mirror of
https://github.com/vale981/ray
synced 2025-03-08 19:41:38 -05:00

When the script terminates, it will also terminate its cluster including dashboard, which will prevent subsequent job submissions. Other long running e2e tests do not terminate in smoke test mode, so make `serve_failure` behave the same.
194 lines
3.3 KiB
YAML
194 lines
3.3 KiB
YAML
- name: actor_deaths
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/actor_deaths.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: apex
|
|
team: ml
|
|
cluster:
|
|
app_config: ../rllib_tests/app_config.yaml
|
|
compute_template: tpl_cpu_3.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: python wait_cluster.py 3 600
|
|
script: python workloads/apex.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
|
|
- name: impala
|
|
team: ml
|
|
cluster:
|
|
app_config: app_config_np.yaml
|
|
compute_template: tpl_cpu_1_large.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
script: python workloads/impala.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: many_actor_tasks
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/many_actor_tasks.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
|
|
- name: many_drivers
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/many_drivers.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
|
|
- name: many_ppo
|
|
team: ml
|
|
cluster:
|
|
app_config: ../rllib_tests/app_config.yaml
|
|
compute_template: many_ppo.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: python wait_cluster.py 1 600
|
|
script: python workloads/many_ppo.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: many_tasks
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/many_tasks.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: many_tasks_serialized_ids
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/many_tasks_serialized_ids.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
|
|
- name: node_failures
|
|
team: core
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/node_failures.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: pbt
|
|
team: ml
|
|
cluster:
|
|
app_config: ../rllib_tests/app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/pbt.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: serve
|
|
team: serve
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/serve.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 3600
|
|
|
|
- name: serve_failure
|
|
team: serve
|
|
cluster:
|
|
app_config: app_config.yaml
|
|
compute_template: tpl_cpu_1.yaml
|
|
|
|
run:
|
|
timeout: 86400
|
|
prepare: ray stop
|
|
script: python workloads/serve_failure.py
|
|
long_running: True
|
|
|
|
smoke_test:
|
|
run:
|
|
timeout: 600
|