ray/release/ml_user_tests/horovod/app_config.yaml
matthewdeng 5c6b91d375
[Release] fix Horovod release tests (#25873)
Error message suggests:

Wait timeout after 30 seconds for key(s): 0. You may want to increase the timeout via HOROVOD_GLOO_TIMEOUT_SECONDS

Bumped up to 120 seconds.

Tests run successfully: https://buildkite.com/ray-project/release-tests-pr/builds/6906
2022-06-17 14:52:54 +01:00

17 lines
652 B
YAML

base_image: {{ env["RAY_IMAGE_ML_NIGHTLY_GPU"] | default("anyscale/ray-ml:nightly-py37-gpu") }}
env_vars: {"HOROVOD_GLOO_TIMEOUT_SECONDS": "120"}
debian_packages:
- curl
python:
pip_packages:
- pytest
- awscli
conda_packages: []
post_build_cmds:
- pip3 uninstall ray -y || true && pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
- pip3 install 'ray[tune]'
- pip3 install torch torchvision
- HOROVOD_WITH_GLOO=1 HOROVOD_WITHOUT_MPI=1 HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITH_PYTORCH=1 pip3 install -U horovod
- {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}