ray/doc/azure/azure-init.sh
mehrdadn b14728d999
Shellcheck quoting (#9596)
* Fix SC2006: Use $(...) notation instead of legacy backticked `...`.

* Fix SC2016: Expressions don't expand in single quotes, use double quotes for that.

* Fix SC2046: Quote this to prevent word splitting.

* Fix SC2053: Quote the right-hand side of == in [[ ]] to prevent glob matching.

* Fix SC2068: Double quote array expansions to avoid re-splitting elements.

* Fix SC2086: Double quote to prevent globbing and word splitting.

* Fix SC2102: Ranges can only match single chars (mentioned due to duplicates).

* Fix SC2140: Word is of the form "A"B"C" (B indicated). Did you mean "ABC" or "A\"B\"C"?

* Fix SC2145: Argument mixes string and array. Use * or separate argument.

* Fix SC2209: warning: Use var=$(command) to assign output (or quote to assign string).

Co-authored-by: Mehrdad <noreply@github.com>
2020-07-21 21:56:41 -05:00

94 lines
No EOL
1.9 KiB
Bash
Executable file

#!/bin/sh
USERNAME=$1
CONDA_ENV=$2
WHEEL=$3
RAY_HEAD_IP=$4
TYPE=$5
echo "Installing wheel..."
sudo -u "$USERNAME" -i /bin/bash -l -c "conda init bash"
sudo -u "$USERNAME" -i /bin/bash -l -c "conda activate $CONDA_ENV; pip install $WHEEL"
echo "Setting up service scripts..."
cat > /home/"$USERNAME"/ray-head.sh << EOM
#!/bin/bash
conda activate $CONDA_ENV
NUM_GPUS=\`nvidia-smi -L | wc -l\`
ray stop
ulimit -n 65536
ray start --head --redis-port=6379 --object-manager-port=8076 --num-gpus=\$NUM_GPUS --block --webui-host 0.0.0.0
EOM
cat > /home/"$USERNAME"/ray-worker.sh << EOM
#!/bin/bash
conda activate $CONDA_ENV
NUM_GPUS=\`nvidia-smi -L | wc -l\`
ray stop
ulimit -n 65536
while true
do
ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 --num-gpus=\$NUM_GPUS --block
echo Ray exited. Auto-restarting in 1 second...
sleep 1
done
EOM
cat > /home/"$USERNAME"/tensorboard.sh << EOM
#!/bin/bash
conda activate $CONDA_ENV
mkdir -p /home/$USERNAME/ray_results
tensorboard --bind_all --logdir=/home/$USERNAME/ray_results
EOM
chmod +x /home/"$USERNAME"/ray-head.sh
chmod +x /home/"$USERNAME"/ray-worker.sh
chmod +x /home/"$USERNAME"/tensorboard.sh
cat > /lib/systemd/system/ray.service << EOM
[Unit]
Description=Ray
[Service]
Type=simple
User=$USERNAME
ExecStart=/bin/bash -l /home/$USERNAME/ray-$TYPE.sh
[Install]
WantedBy=multi-user.target
EOM
cat > /lib/systemd/system/tensorboard.service << EOM
[Unit]
Description=TensorBoard
[Service]
Type=simple
User=$USERNAME
ExecStart=/bin/bash -l /home/$USERNAME/tensorboard.sh
[Install]
WantedBy=multi-user.target
EOM
echo "Configure ray to start at boot..."
systemctl enable ray
echo "Starting ray..."
systemctl start ray
# shellcheck disable=SC2154
if [ "$type" = "head" ]; then
echo "Configure TensorBoard to start at boot..."
systemctl enable tensorboard
echo "Starting TensorBoard..."
systemctl start tensorboard
fi