mirror of
https://github.com/vale981/ray
synced 2025-03-08 11:31:40 -05:00

* add copy * upgrade to tf 1.14 * update * reduce count to workaround https://github.com/ray-project/ray/issues/5125 * Update impala.py * placeholder * comments * update
74 lines
2.1 KiB
Bash
Executable file
74 lines
2.1 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
set -e
|
|
|
|
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
|
|
pushd "$ROOT_DIR"
|
|
|
|
# Substitute in the appropriate Ray version and commit in the config file and
|
|
# store it in a temporary file.
|
|
CLUSTER_CONFIG="config.yaml"
|
|
|
|
if grep -q RAY_WHEEL_TO_TEST_HERE $CLUSTER_CONFIG; then
|
|
echo "You must replace the RAY_WHEEL_TO_TEST_HERE string in $CLUSTER_CONFIG."
|
|
exit 1
|
|
fi
|
|
|
|
# Start one instance per workload.
|
|
for workload_file in "$ROOT_DIR"/workloads/*; do
|
|
file_name=$(basename -- "$workload_file")
|
|
workload_name="${file_name%.*}"
|
|
ray up -y $CLUSTER_CONFIG --cluster-name="$workload_name" &
|
|
done
|
|
# Wait for all of the nodes to be up.
|
|
wait
|
|
|
|
status=$?
|
|
if [ $status != 0 ]; then
|
|
echo "Some update processes failed with $status"
|
|
exit 1
|
|
fi
|
|
|
|
# Start the workloads running.
|
|
for workload_file in "$ROOT_DIR"/workloads/*; do
|
|
file_name=$(basename -- "$workload_file")
|
|
workload_name="${file_name%.*}"
|
|
(
|
|
# Copy the workload to the cluster.
|
|
ray rsync_up $CLUSTER_CONFIG --cluster-name="$workload_name" "$workload_file" "$file_name"
|
|
# Clean up previous runs if relevant.
|
|
ray exec $CLUSTER_CONFIG --cluster-name="$workload_name" "source activate tensorflow_p36 && ray stop; rm -r /tmp/ray; tmux kill-server | true"
|
|
# Start the workload.
|
|
ray exec $CLUSTER_CONFIG --cluster-name="$workload_name" "source activate tensorflow_p36 && python $file_name" --tmux
|
|
) &
|
|
done
|
|
# Wait for child processes to finish.
|
|
wait
|
|
|
|
popd
|
|
|
|
# Print some helpful information.
|
|
|
|
echo ""
|
|
echo ""
|
|
|
|
echo "Use the following commands to attach to the relevant drivers."
|
|
echo ""
|
|
for workload_file in "$ROOT_DIR"/workloads/*; do
|
|
file_name=$(basename -- "$workload_file")
|
|
workload_name="${file_name%.*}"
|
|
echo " ray attach $ROOT_DIR/$CLUSTER_CONFIG --cluster-name=$workload_name --tmux"
|
|
done
|
|
|
|
echo ""
|
|
echo ""
|
|
|
|
echo "To shut down all instances, run the following."
|
|
echo " $ROOT_DIR/shut_down_workloads.sh"
|
|
|
|
echo ""
|
|
echo ""
|
|
|
|
echo "To check up on the scripts, run the following."
|
|
echo " $ROOT_DIR/check_workloads.sh --load"
|
|
echo " $ROOT_DIR/check_workloads.sh --logs"
|