2021-10-29 12:12:01 +02:00
|
|
|
- name: aws_no_sync_down
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py no_sync_down
|
|
|
|
|
|
|
|
- name: aws_ssh_sync
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py ssh_sync
|
|
|
|
|
|
|
|
- name: aws_durable_upload
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py durable_upload --bucket s3://data-test-ilr/durable_upload
|
|
|
|
|
2021-11-16 20:52:42 +00:00
|
|
|
- name: aws_durable_upload_rllib_str
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-11-16 20:52:42 +00:00
|
|
|
cluster:
|
|
|
|
app_config: app_config_ml.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py durable_upload --trainable rllib_str --bucket s3://data-test-ilr/durable_upload_rllib_str
|
|
|
|
|
|
|
|
- name: aws_durable_upload_rllib_trainer
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-11-16 20:52:42 +00:00
|
|
|
cluster:
|
|
|
|
app_config: app_config_ml.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py durable_upload --trainable rllib_trainer --bucket s3://data-test-ilr/durable_upload_rllib_trainer
|
|
|
|
|
2021-10-29 12:12:01 +02:00
|
|
|
- name: aws_no_durable_upload
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_aws_4x2.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 600
|
|
|
|
prepare: python wait_cluster.py 4 600
|
|
|
|
script: python workloads/run_cloud_test.py no_durable_upload --bucket s3://data-test-ilr/durable_upload
|
|
|
|
|
|
|
|
- name: gcp_k8s_no_sync_down
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_gcp_k8s_4x8.yaml
|
|
|
|
cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM # anyscale_k8s_gcp_cloud
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
timeout: 600
|
|
|
|
# Remove --cpus-per-trial 8 once n2-standard-2 is supported
|
|
|
|
script: python workloads/run_cloud_test.py no_sync_down --cpus-per-trial 8
|
|
|
|
|
|
|
|
- name: gcp_k8s_ssh_sync
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_gcp_k8s_4x8.yaml
|
|
|
|
cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM # anyscale_k8s_gcp_cloud
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
timeout: 600
|
|
|
|
# Remove --cpus-per-trial 8 once n2-standard-2 is supported
|
|
|
|
script: python workloads/run_cloud_test.py ssh_sync --cpus-per-trial 8
|
|
|
|
|
|
|
|
- name: gcp_k8s_durable_upload
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_gcp_k8s_4x8.yaml
|
|
|
|
cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM # anyscale_k8s_gcp_cloud
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
timeout: 600
|
|
|
|
# Remove --cpus-per-trial 8 once n2-standard-2 is supported
|
|
|
|
script: python workloads/run_cloud_test.py durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
|
|
|
|
|
|
|
|
|
|
|
|
- name: gcp_k8s_no_durable_upload
|
2022-01-20 06:29:53 +09:00
|
|
|
team: ml
|
2021-10-29 12:12:01 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: tpl_gcp_k8s_4x8.yaml
|
|
|
|
cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM # anyscale_k8s_gcp_cloud
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
timeout: 600
|
|
|
|
# Remove --cpus-per-trial 8 once n2-standard-2 is supported
|
|
|
|
script: python workloads/run_cloud_test.py no_durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
|