ray/release/long_running_tests/long_running_tests.yaml
mwtian 97f7e3d0e6
[e2e] do not terminate in serve_failure smoke test (#21925)
When the script terminates, it will also terminate its cluster including dashboard, which will prevent subsequent job submissions. Other long running e2e tests do not terminate in smoke test mode, so make `serve_failure` behave the same.
2022-01-27 15:36:46 -08:00

194 lines
3.3 KiB
YAML

- name: actor_deaths
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/actor_deaths.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: apex
team: ml
cluster:
app_config: ../rllib_tests/app_config.yaml
compute_template: tpl_cpu_3.yaml
run:
timeout: 86400
prepare: python wait_cluster.py 3 600
script: python workloads/apex.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: impala
team: ml
cluster:
app_config: app_config_np.yaml
compute_template: tpl_cpu_1_large.yaml
run:
timeout: 86400
script: python workloads/impala.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: many_actor_tasks
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/many_actor_tasks.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: many_drivers
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/many_drivers.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: many_ppo
team: ml
cluster:
app_config: ../rllib_tests/app_config.yaml
compute_template: many_ppo.yaml
run:
timeout: 86400
prepare: python wait_cluster.py 1 600
script: python workloads/many_ppo.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: many_tasks
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/many_tasks.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: many_tasks_serialized_ids
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/many_tasks_serialized_ids.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: node_failures
team: core
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/node_failures.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: pbt
team: ml
cluster:
app_config: ../rllib_tests/app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/pbt.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: serve
team: serve
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/serve.py
long_running: True
smoke_test:
run:
timeout: 3600
- name: serve_failure
team: serve
cluster:
app_config: app_config.yaml
compute_template: tpl_cpu_1.yaml
run:
timeout: 86400
prepare: ray stop
script: python workloads/serve_failure.py
long_running: True
smoke_test:
run:
timeout: 600