ray/release/rllib_tests/rllib_tests.yaml

# Heavy learning tests (Atari and HalfCheetah) for major algos.
- name: learning_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 8gpus_64cpus.yaml

  run:
    timeout: 14400
    script: python learning_tests/run.py

  smoke_test:
      run:
        timeout: 1200

# 2-GPU learning tests (CartPole and RepeatAfterMeEnv) for major algos.
- name: multi_gpu_learning_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 8gpus_96cpus.yaml

  run:
    timeout: 7200
    script: python multi_gpu_learning_tests/run.py

# 2-GPU learning tests (StatelessCartPole) + use_lstm=True for major algos
# (that support RNN models).
- name: multi_gpu_with_lstm_learning_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 8gpus_96cpus.yaml

  run:
    timeout: 7200
    script: python multi_gpu_with_lstm_learning_tests/run.py

# 2-GPU learning tests (StatelessCartPole) + use_attention=True for major
# algos (that support RNN models).
- name: multi_gpu_with_attention_learning_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 8gpus_96cpus.yaml

  run:
    timeout: 7200
    script: python multi_gpu_with_attention_learning_tests/run.py

# We'll have these as per-PR tests soon.
# - name: example_scripts_on_gpu_tests
#   team: ml
#  cluster:
#    app_config: app_config.yaml
#    compute_template: 1gpu_4cpus.yaml

#  run:
#    timeout: 7200
#    script: bash unit_gpu_tests/run.sh

# IMPALA large machine stress tests (4x Atari).
- name: stress_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 4gpus_544_cpus.yaml

  run:
    timeout: 5400
    prepare: python wait_cluster.py 6 600
    script: python stress_tests/run_stress_tests.py

  smoke_test:
      run:
        timeout: 2000

# Tests that exercise auto-scaling and Anyscale connect.
- name: connect_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: auto_scale.yaml

  run:
    use_connect: True
    timeout: 3000
    script: python connect_tests/run_connect_tests.py

# Nightly performance regression for popular algorithms.
# These algorithms run nightly for pre-determined amount of time without
# passing criteria.
# Performance metrics, such as reward achieved and throughput, are then
# collected and tracked over time.
- name: performance_tests
  team: ml
  cluster:
    app_config: app_config.yaml
    compute_template: 12gpus_192cpus.yaml

  run:
    timeout: 7200
    script: python performance_tests/run.py