- name: train_small cluster: app_config: app_config.yaml compute_template: tpl_cpu_small.yaml run: use_connect: True autosuspend_mins: 10 timeout: 600 prepare: python wait_cluster.py 4 600 script: python workloads/train_small.py - name: train_moderate cluster: app_config: app_config.yaml compute_template: tpl_cpu_moderate.yaml run: timeout: 600 prepare: python wait_cluster.py 32 600 script: python workloads/train_moderate.py - name: train_gpu cluster: app_config: app_config_gpu.yaml compute_template: tpl_gpu_small.yaml run: timeout: 600 prepare: python wait_cluster.py 5 600 script: python workloads/train_gpu.py - name: distributed_api_test cluster: app_config: app_config.yaml compute_template: tpl_cpu_small.yaml results: run: timeout: 600 prepare: python wait_cluster.py 4 600 script: python workloads/distributed_api_test.py results: "" - name: ft_small_elastic cluster: app_config: app_config.yaml compute_template: tpl_cpu_small.yaml run: timeout: 900 prepare: python wait_cluster.py 4 600 script: python workloads/ft_small_elastic.py results: "" - name: ft_small_non_elastic cluster: app_config: app_config.yaml compute_template: tpl_cpu_small.yaml run: timeout: 900 prepare: python wait_cluster.py 4 600 script: python workloads/ft_small_non_elastic.py results: "" - name: tune_small cluster: app_config: app_config.yaml compute_template: tpl_cpu_small.yaml run: timeout: 600 prepare: python wait_cluster.py 4 600 script: python workloads/tune_small.py - name: tune_32x4 cluster: app_config: app_config.yaml compute_template: tpl_cpu_moderate.yaml run: timeout: 900 prepare: python wait_cluster.py 32 600 script: python workloads/tune_32x4.py - name: tune_4x32 cluster: app_config: app_config.yaml compute_template: tpl_cpu_moderate.yaml run: timeout: 900 prepare: python wait_cluster.py 32 600 script: python workloads/tune_4x32.py