2021-08-24 21:55:27 +02:00
|
|
|
# Heavy learning tests (Atari and HalfCheetah) for major algos.
|
2021-06-01 17:39:18 +02:00
|
|
|
- name: learning_tests
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: 8gpus_64cpus.yaml
|
|
|
|
|
|
|
|
run:
|
2021-09-16 18:22:23 +02:00
|
|
|
timeout: 14400
|
2021-08-24 21:55:27 +02:00
|
|
|
script: python learning_tests/run.py
|
2021-06-01 17:39:18 +02:00
|
|
|
|
2021-09-01 21:46:06 +02:00
|
|
|
smoke_test:
|
|
|
|
run:
|
2021-09-16 18:22:23 +02:00
|
|
|
timeout: 1200
|
2021-09-01 21:46:06 +02:00
|
|
|
|
2021-08-24 21:55:27 +02:00
|
|
|
# 2-GPU learning tests (CartPole and RepeatAfterMeEnv) for major algos.
|
2021-08-18 17:21:01 +02:00
|
|
|
- name: multi_gpu_learning_tests
|
2021-06-01 17:39:18 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
2021-08-24 21:55:27 +02:00
|
|
|
compute_template: 8gpus_96cpus.yaml
|
2021-06-01 17:39:18 +02:00
|
|
|
|
|
|
|
run:
|
2021-08-24 21:55:27 +02:00
|
|
|
timeout: 7200
|
|
|
|
script: python multi_gpu_learning_tests/run.py
|
|
|
|
|
2021-09-06 17:48:05 +02:00
|
|
|
# 2-GPU learning tests (StatelessCartPole) + use_lstm=True for major algos
|
2021-08-24 21:55:27 +02:00
|
|
|
# (that support RNN models).
|
|
|
|
- name: multi_gpu_with_lstm_learning_tests
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: 8gpus_96cpus.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 7200
|
|
|
|
script: python multi_gpu_with_lstm_learning_tests/run.py
|
2021-08-18 17:21:01 +02:00
|
|
|
|
2021-09-06 17:48:05 +02:00
|
|
|
# 2-GPU learning tests (StatelessCartPole) + use_attention=True for major
|
|
|
|
# algos (that support RNN models).
|
2021-09-08 14:30:37 -07:00
|
|
|
- name: multi_gpu_with_attention_learning_tests
|
2021-09-06 17:48:05 +02:00
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: 8gpus_96cpus.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 7200
|
|
|
|
script: python multi_gpu_with_attention_learning_tests/run.py
|
|
|
|
|
2021-08-18 17:21:01 +02:00
|
|
|
# We'll have these as per-PR tests soon.
|
|
|
|
# - name: example_scripts_on_gpu_tests
|
|
|
|
# cluster:
|
|
|
|
# app_config: app_config.yaml
|
|
|
|
# compute_template: 1gpu_4cpus.yaml
|
|
|
|
|
|
|
|
# run:
|
|
|
|
# timeout: 7200
|
|
|
|
# script: bash unit_gpu_tests/run.sh
|
2021-06-01 17:39:18 +02:00
|
|
|
|
2021-09-16 18:22:23 +02:00
|
|
|
# IMPALA large machine stress tests (4x Atari).
|
2021-06-01 17:39:18 +02:00
|
|
|
- name: stress_tests
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: 4gpus_544_cpus.yaml
|
|
|
|
|
|
|
|
run:
|
2021-08-03 17:34:27 +01:00
|
|
|
timeout: 5400
|
2021-09-14 19:01:22 +01:00
|
|
|
prepare: python wait_cluster.py 6 600
|
2021-08-03 17:34:27 +01:00
|
|
|
script: python stress_tests/run_stress_tests.py
|
2021-09-01 21:46:06 +02:00
|
|
|
|
|
|
|
smoke_test:
|
|
|
|
run:
|
2021-09-22 15:30:42 +02:00
|
|
|
timeout: 2000
|
2021-11-03 17:04:27 -07:00
|
|
|
|
|
|
|
# Tests that exercise auto-scaling and Anyscale connect.
|
|
|
|
- name: connect_tests
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: auto_scale.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
use_connect: True
|
|
|
|
timeout: 3000
|
|
|
|
script: python connect_tests/run_connect_tests.py
|
2021-11-08 09:15:13 -08:00
|
|
|
|
|
|
|
# Nightly performance regression for popular algorithms.
|
|
|
|
# These algorithms run nightly for pre-determined amount of time without
|
|
|
|
# passing criteria.
|
|
|
|
# Performance metrics, such as reward achieved and throughput, are then
|
|
|
|
# collected and tracked over time.
|
|
|
|
- name: performance_tests
|
|
|
|
cluster:
|
|
|
|
app_config: app_config.yaml
|
|
|
|
compute_template: 12gpus_192cpus.yaml
|
|
|
|
|
|
|
|
run:
|
|
|
|
timeout: 7200
|
|
|
|
script: python performance_tests/run.py
|