[train/release] Fix horovod user test master app config (#24734)

This commit is contained in:
Kai Fricke 2022-05-15 05:20:45 +01:00 committed by GitHub
parent 9381dd174e
commit de69b0d6d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 1 deletions

View file

@ -0,0 +1,18 @@
base_image: "anyscale/ray-ml:nightly-py37-gpu"
env_vars: {}
debian_packages:
- curl
python:
pip_packages:
- pytest
- awscli
conda_packages: []
post_build_cmds:
- pip3 uninstall ray -y || true
- pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
- pip3 install 'ray[tune]'
- pip3 install torch torchvision
- HOROVOD_WITH_GLOO=1 HOROVOD_WITHOUT_MPI=1 HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITHOUT_MXNET=1 HOROVOD_WITH_PYTORCH=1 pip3 install -U git+https://github.com/horovod/horovod.git
- {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}

View file

@ -593,7 +593,7 @@
team: ml team: ml
cluster: cluster:
cluster_env: ../horovod_tests/app_config_master.yaml cluster_env: horovod/app_config_master.yaml
cluster_compute: horovod/compute_tpl.yaml cluster_compute: horovod/compute_tpl.yaml
driver_setup: horovod/driver_setup_master.sh driver_setup: horovod/driver_setup_master.sh