- name: inference team: core cluster: app_config: app_config.yaml compute_template: inference.yaml run: timeout: 600 prepare: python wait_cluster.py 2 600 script: python inference.py - name: shuffle_data_loader team: core cluster: app_config: shuffle_app_config.yaml compute_template: shuffle_compute.yaml run: timeout: 1800 script: python dataset_shuffle_data_loader.py - name: parquet_metadata_resolution team: core cluster: app_config: pipelined_training_app.yaml compute_template: pipelined_training_compute.yaml run: timeout: 1200 prepare: python wait_cluster.py 15 1200 script: python parquet_metadata_resolution.py --num-files 915 - name: pipelined_training_50_gb team: core cluster: app_config: pipelined_training_app.yaml compute_template: pipelined_training_compute.yaml run: timeout: 4800 prepare: python wait_cluster.py 15 1200 script: python pipelined_training.py --epochs 1 - name: pipelined_ingestion_1500_gb team: core cluster: app_config: pipelined_ingestion_app.yaml compute_template: pipelined_ingestion_compute.yaml run: timeout: 9600 prepare: python wait_cluster.py 21 2400 script: python pipelined_training.py --epochs 2 --num-windows 2 --num-files 915 --debug - name: datasets_ingest_train_infer team: core cluster: app_config: ray_sgd_training_app.yaml compute_template: ray_sgd_training_compute.yaml run: timeout: 14400 prepare: python wait_cluster.py 66 2400 script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu --large-dataset smoke_test: cluster: app_config: ray_sgd_training_app.yaml compute_template: ray_sgd_training_smoke_compute.yaml run: timeout: 3600 prepare: python wait_cluster.py 8 2400 script: python ray_sgd_training.py --address auto --use-s3 --num-workers 8 --use-gpu - name: datasets_preprocess_ingest team: core cluster: app_config: ray_sgd_training_app.yaml compute_template: ray_sgd_training_compute_no_gpu.yaml run: timeout: 7200 prepare: python wait_cluster.py 21 2400 script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu --large-dataset --debug - name: datasets_ingest_400G team: core cluster: app_config: ray_sgd_training_app.yaml compute_template: dataset_ingest_400G_compute.yaml run: timeout: 7200 script: python ray_sgd_runner.py --address auto --use-gpu --num-epochs 1