Improve release process from 0.8.2 (#7303)

This commit is contained in:
Simon Mo 2020-02-24 21:18:53 -08:00 committed by GitHub
parent f2faf8d26e
commit 29b08ddc09
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
31 changed files with 594 additions and 443 deletions

View file

@ -6,8 +6,8 @@ forever until they fail. To set up the project you need to run
.. code-block:: bash
pip install any
any project create
pip install anyscale
anyscale project create
Running the Workloads
@ -17,21 +17,21 @@ You can start all the workloads with:
.. code-block:: bash
any session start -y run --workload="*" --wheel=https://s3-us-west-2.amazonaws.com/ray-wheels/releases/0.7.5/6da7eff4b20340f92d3fe1160df35caa68922a97/ray-0.7.5-cp36-cp36m-manylinux1_x86_64.whl
anyscale session start -y run --workload="*" --wheel=https://s3-us-west-2.amazonaws.com/ray-wheels/releases/0.7.5/6da7eff4b20340f92d3fe1160df35caa68922a97/ray-0.7.5-cp36-cp36m-manylinux1_x86_64.whl
This will start one EC2 instance per workload and will start the workloads
running (one per instance). You can start a specific workload by specifying
its name as an argument ``--workload=`` instead of ``"*"``. A list of available options
is available via `any session start run --help`.
its name as an argument ``--workload=`` instead of ``"*"``. A list of
available options is available via `any session start run --help`.
Check Workload Statuses
-----------------------
To check up on the workloads, run either
``any session --name="*" execute check-load``, which
``anyscale session --name="*" execute check-load``, which
will print the load on each machine, or
``any session --name="*" execute show-output``, which
``anyscale session --name="*" execute show-output``, which
will print the tail of the output for each workload.
To debug workloads that have failed, you may find it useful to ssh to the
@ -43,7 +43,7 @@ Shut Down the Workloads
-----------------------
The instances running the workloads can all be killed by running
``any session stop --name "*"``.
``anyscale session stop --name "*"``.
Adding a Workload
-----------------

View file

@ -57,15 +57,17 @@ for _ in range(5):
time.sleep(0.5)
connections = int(config.num_replicas * config.max_batch_size * 0.75)
proc = subprocess.Popen(
[
"./hey_linux_amd64", "-c",
str(connections), "-z", "360m", "http://127.0.0.1:8000/echo"
],
stdout=PIPE,
stderr=PIPE)
print("started load testing")
proc.wait()
out, err = proc.communicate()
print(out.decode())
print(err.decode())
while True:
proc = subprocess.Popen(
[
"./hey_linux_amd64", "-c",
str(connections), "-z", "60m", "http://127.0.0.1:8000/echo"
],
stdout=PIPE,
stderr=PIPE)
print("started load testing")
proc.wait()
out, err = proc.communicate()
print(out.decode())
print(err.decode())

View file

@ -0,0 +1,55 @@
cluster_name: ray-release-microbenchmark
min_workers: 0
max_workers: 0
target_utilization_fraction: 0.8
idle_timeout_minutes: 5
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
auth:
ssh_user: ubuntu
head_node:
InstanceType: m4.16xlarge
ImageId: ami-06d51e91cea0dac8d # Ubuntu 18.04
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 150
worker_nodes:
InstanceType: m5.large
ImageId: ami-06d51e91cea0dac8d # Ubuntu 18.04
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 150
# Run workers on spot by default. Comment this out to use on-demand.
InstanceMarketOptions:
MarketType: spot
# List of shell commands to run to set up nodes.
setup_commands:
# Install latest TensorFlow
- echo set-window-option -g mouse on > ~/.tmux.conf
- echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
# Custom commands that will be run on the head node after common setup.
head_setup_commands:
# Install Anaconda.
- wget --quiet https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh || true
- bash Anaconda3-5.0.1-Linux-x86_64.sh -b -p $HOME/anaconda3 || true
- echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc
# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands: []
# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands: []
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands: []

View file

@ -0,0 +1,39 @@
name: microbenchmark
description: "Ray's microbenchmark"
cluster:
config: ray-project/cluster.yaml
commands:
- name: run
help: "Start one microbenchmark trial."
command: |
rm ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl || true
wget https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
pip uninstall -y -q ray
pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
OMP_NUM_THREADS=64 ray microbenchmark
params:
- name: ray_version # Ray version string.
default: "0.9.0.dev0"
- name: commit # Ray commit SHA string.
default: "FILL ME IN"
- name: ray_branch
default: "master"
config:
tmux: true
# Pathnames for files and directories that should be saved
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
# directory or absolute. Generally, this should be files
# that were created by an active session, such as
# application checkpoints and logs.
output_files: [
# For example, uncomment this to save the logs from the
# last ray job.
# "/tmp/ray/session_latest",
]

View file

@ -0,0 +1,145 @@
# This file runs on a single g3.16xl or p3.16xl node. It is suggested
# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
# inherently high variance, so you'll have to check to see if the
# rewards reached seem reasonably in line with previous results.
#
# You can find the reference results here:
# https://github.com/ray-project/ray/tree/master/doc/dev/release_logs
atari-impala:
env: BreakoutNoFrameskip-v4
run: IMPALA
num_samples: 4
stop:
time_total_s: 3600
config:
sample_batch_size: 50
train_batch_size: 500
num_workers: 10
num_envs_per_worker: 5
clip_rewards: True
lr_schedule: [
[0, 0.0005],
[20000000, 0.000000000001],
]
num_gpus: 1
atari-ppo-tf:
env: BreakoutNoFrameskip-v4
run: PPO
num_samples: 4
stop:
time_total_s: 3600
config:
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
sample_batch_size: 100
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 10
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: true
num_gpus: 1
atari-ppo-torch:
env: BreakoutNoFrameskip-v4
run: PPO
num_samples: 4
stop:
time_total_s: 3600
config:
use_pytorch: true,
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
sample_batch_size: 100
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 10
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
vf_share_layers: true
num_gpus: 1
apex:
env: BreakoutNoFrameskip-v4
run: APEX
num_samples: 4
stop:
time_total_s: 3600
config:
double_q: false
dueling: false
num_atoms: 1
noisy: false
n_step: 3
lr: .0001
adam_epsilon: .00015
hiddens: [512]
buffer_size: 1000000
exploration_config:
epsilon_timesteps: 200000
final_epsilon: 0.01
prioritized_replay_alpha: 0.5
final_prioritized_replay_beta: 1.0
prioritized_replay_beta_annealing_timesteps: 2000000
num_gpus: 1
num_workers: 8
num_envs_per_worker: 8
sample_batch_size: 20
train_batch_size: 512
target_network_update_freq: 50000
timesteps_per_iteration: 25000
atari-a2c:
env: BreakoutNoFrameskip-v4
run: A2C
num_samples: 4
stop:
time_total_s: 3600
config:
sample_batch_size: 20
clip_rewards: True
num_workers: 5
num_envs_per_worker: 5
num_gpus: 1
lr_schedule: [
[0, 0.0007],
[20000000, 0.000000000001],
]
atari-basic-dqn:
env: BreakoutNoFrameskip-v4
run: DQN
num_samples: 4
stop:
time_total_s: 3600
config:
double_q: false
dueling: false
num_atoms: 1
noisy: false
prioritized_replay: false
n_step: 1
target_network_update_freq: 8000
lr: .0000625
adam_epsilon: .00015
hiddens: [512]
learning_starts: 20000
buffer_size: 1000000
sample_batch_size: 4
train_batch_size: 32
exploration_config:
epsilon_timesteps: 200000
final_epsilon: 0.01
prioritized_replay_alpha: 0.5
final_prioritized_replay_beta: 1.0
prioritized_replay_beta_annealing_timesteps: 2000000
num_gpus: 0.2
timesteps_per_iteration: 10000

View file

@ -0,0 +1,43 @@
cluster_name: ray-rllib-regression-tests
min_workers: 0
max_workers: 0
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: False
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
head_node:
InstanceType: p3.16xlarge
ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
# Set primary volume to 25 GiB
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
# List of shell commands to run to set up nodes.
setup_commands:
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/releases/{{ray_version}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
- source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
- source activate tensorflow_p36 && pip install ray[rllib] ray[debug]
- source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands:
- source activate tensorflow_p36 && ray stop
- ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
- source activate tensorflow_p36 && ray stop
- ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076

View file

@ -0,0 +1,53 @@
# This file is generated by `ray project create`.
name: rllib_regression_tests
# description: A short description of the project.
# The URL of the repo this project is part of.
# repo: ...
cluster:
config: ray-project/cluster.yaml
params:
- name: ray_version # Ray version string.
default: "0.8.2"
- name: commit # Ray commit SHA string.
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
environment:
# dockerfile: The dockerfile to be built and ran the commands with.
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
requirements: ray-project/requirements.txt
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: check-load
command: uptime
help: "Check load of the workload."
- name: check-gpu
command: nvidia-smi
help: "Check load of the gpu."
- name: show-output
command: tmux capture-pane -p
help: "Show tail of the workoad output."
- name: run-regression-tests
command: source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml
help: "Run rllib regression tests"
# Pathnames for files and directories that should be saved
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
# directory or absolute. Generally, this should be files
# that were created by an active session, such as
# application checkpoints and logs.
output_files: [
# For example, uncomment this to save the logs from the
# last ray job.
# "/tmp/ray/session_latest",
]

View file

@ -0,0 +1 @@
ray[rllib]

View file

@ -0,0 +1,24 @@
# Taken from rllib/tuned_examples/atari_impala_large.yaml
# Runs on a g3.16xl node with 5 m5.24xl workers
# Takes roughly 10 minutes. x10?
atari-impala:
env:
grid_search:
- BreakoutNoFrameskip-v4
- BeamRiderNoFrameskip-v4
- QbertNoFrameskip-v4
- SpaceInvadersNoFrameskip-v4
run: IMPALA
stop:
timesteps_total: 30000000
config:
sample_batch_size: 50
train_batch_size: 500
num_workers: 128
num_envs_per_worker: 5
clip_rewards: True
lr_schedule: [
[0, 0.0005],
[20000000, 0.000000000001],
]

View file

@ -3,22 +3,15 @@
####################################################################
# An unique identifier for the head node and workers of this cluster.
cluster_name: <<<CLUSTER_NAME>>>
cluster_name: ray-rllib-stress-tests
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
min_workers: <<<MIN_WORKERS>>>
min_workers: 9
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers.
max_workers: <<<MAX_WORKERS>>>
# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
docker:
image: "" # e.g., tensorflow/tensorflow:1.5.0-py3
container_name: "" # e.g. ray_docker
max_workers: 9
# The autoscaler will scale up the cluster to this target fraction of resource
# usage. For example, if a cluster of 10 nodes is 100% busy and
@ -35,7 +28,7 @@ provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: false
cache_stopped_nodes: False
# How Ray will authenticate with newly launched nodes.
auth:
@ -50,10 +43,10 @@ auth:
# For more documentation on available fields, see:
# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
head_node:
InstanceType: <<<HEAD_TYPE>>>
InstanceType: p3.16xlarge
ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
# You can provision additional disk space with a conf as follows
# Set primary volume to 25 GiB
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
@ -66,12 +59,19 @@ head_node:
# For more documentation on available fields, see:
# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
worker_nodes:
InstanceType: <<<WORKER_TYPE>>>
InstanceType: m4.16xlarge
ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
# Set primary volume to 25 GiB
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
# Run workers on spot by default. Comment this out to use on-demand.
# InstanceMarketOptions:
# MarketType: spot
# InstanceMarketOptions:
# MarketType: spot
# Additional options can be found in the boto docs, e.g.
# SpotOptions:
# MaxPrice: MAX_HOURLY_PRICE
@ -87,17 +87,13 @@ file_mounts: {
# List of shell commands to run to set up nodes.
setup_commands:
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/releases/<<<RAY_VERSION>>>/<<<RAY_COMMIT>>>/ray-<<<RAY_VERSION>>>-<<<WHEEL_STR>>>-manylinux1_x86_64.whl
- source activate tensorflow_p36 && pip install -U ray-<<<RAY_VERSION>>>-<<<WHEEL_STR>>>-manylinux1_x86_64.whl
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/releases/{{ray_version}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
- source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
- source activate tensorflow_p36 && pip install ray[rllib] ray[debug]
# Consider uncommenting these if you also want to run apt-get commands during setup
# - sudo pkill -9 apt-get || true
# - sudo pkill -9 dpkg || true
# - sudo dpkg --configure -a
- source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
# Custom commands that will be run on the head node after common setup.
head_setup_commands:
- pip install boto3==1.4.8 # 1.4.8 adds InstanceMarketOptions
head_setup_commands: []
# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands: []

View file

@ -0,0 +1,49 @@
# This file is generated by `ray project create`.
name: rllib_stress_tests
# description: A short description of the project.
# The URL of the repo this project is part of.
# repo: ...
cluster:
config: ray-project/cluster.yaml
params:
- name: ray_version # Ray version string.
default: "0.8.2"
- name: commit # Ray commit SHA string.
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
environment:
# dockerfile: The dockerfile to be built and ran the commands with.
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
requirements: ray-project/requirements.txt
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: check-load
command: uptime
help: "Check load of the workload."
- name: show-output
command: tmux capture-pane -p
help: "Show tail of the workoad output."
- name: run-impala
command: bash run.sh
help: "Run impala stress test"
# Pathnames for files and directories that should be saved
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
# directory or absolute. Generally, this should be files
# that were created by an active session, such as
# application checkpoints and logs.
output_files: [
# For example, uncomment this to save the logs from the
# last ray job.
# "/tmp/ray/session_latest",
]

View file

@ -0,0 +1 @@
ray[rllib]

View file

@ -0,0 +1,6 @@
source activate tensorflow_p36
python3 wait_cluster.py
rllib train -f atari_impala_xlarge.yaml --ray-address=auto --queue-trials

View file

@ -0,0 +1,10 @@
import ray
import time
ray.init(address="auto")
curr_nodes = 0
while not curr_nodes > 8:
print("Waiting for more nodes to come up: {}/{}".format(curr_nodes, 8))
curr_nodes = len(ray.nodes())
time.sleep(5)

View file

@ -98,7 +98,7 @@ setup_commands:
# - ray/ci/travis/install-bazel.sh
- pip install boto3==1.4.8 cython==0.29.0
# - cd ray/python; git checkout master; git pull; pip install -e . --verbose
- "pip install https://s3-us-west-2.amazonaws.com/ray-wheels/releases/{{ray_version}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl"
- "pip install https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl"
# Custom commands that will be run on the head node after common setup.
head_setup_commands: []

View file

@ -6,11 +6,13 @@ cluster:
config: ray-project/cluster.yaml
params:
- name: ray_version # Ray version string.
default: "0.8.1"
default: "0.8.2"
- name: commit # Ray commit SHA string.
default: "38ec2e70524a277d5aea307f6c843065ff982da5"
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
- name: ray_branch
default: "releases/0.8.2"
commands:
- name: test_many_tasks

View file

@ -1,4 +0,0 @@
*.log
*temporary.yaml
rllib_impala_p36.yaml
sgd_p36.yaml

View file

@ -1,158 +0,0 @@
#!/usr/bin/env bash
# This script should be run as follows:
# ./run_application_stress_tests.sh <ray-version> <ray-commit>
# For example, <ray-version> might be 0.7.1
# and <ray-commit> might be bc3b6efdb6933d410563ee70f690855c05f25483. The commit
# should be the latest commit on the branch "releases/<ray-version>".
# This script runs all of the application tests.
# Currently includes an IMPALA stress test and a SGD stress test on Python 3.6.
# All tests use a separate cluster, and each cluster
# will be destroyed upon test completion (or failure).
# Note that if the environment variable DEBUG_MODE is detected,
# the clusters will not be automatically shut down after the test runs.
# This script will exit with code 1 if the test did not run successfully.
# Show explicitly which commands are currently running. This should only be AFTER
# the private key is placed.
set -x
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
RESULT_FILE=$ROOT_DIR/"results-$(date '+%Y-%m-%d_%H-%M-%S').log"
touch "$RESULT_FILE"
echo "Logging to" "$RESULT_FILE"
if [[ -z "$1" ]]; then
echo "ERROR: The first argument must be the Ray version string."
exit 1
else
RAY_VERSION=$1
fi
if [[ -z "$2" ]]; then
echo "ERROR: The second argument must be the commit hash to test."
exit 1
else
RAY_COMMIT=$2
fi
echo "Testing ray==$RAY_VERSION at commit $RAY_COMMIT."
echo "The wheels used will live under https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_COMMIT/"
# This function identifies the right string for the Ray wheel.
_find_wheel_str(){
local python_version=$1
# echo "PYTHON_VERSION", $python_version
local wheel_str=""
if [ "$python_version" == "p27" ]; then
wheel_str="cp27-cp27mu"
else
wheel_str="cp36-cp36m"
fi
echo $wheel_str
}
# Total time is roughly 25 minutes.
# Actual test runtime is roughly 10 minutes.
test_impala(){
local PYTHON_VERSION=$1
local WHEEL_STR=$(_find_wheel_str "$PYTHON_VERSION")
pushd "$ROOT_DIR"
local TEST_NAME="rllib_impala_$PYTHON_VERSION"
local CLUSTER="$TEST_NAME.yaml"
echo "Creating IMPALA cluster YAML from template."
cat application_cluster_template.yaml |
sed -e "
s/<<<RAY_VERSION>>>/$RAY_VERSION/g;
s/<<<RAY_COMMIT>>>/$RAY_COMMIT/;
s/<<<CLUSTER_NAME>>>/$TEST_NAME/;
s/<<<HEAD_TYPE>>>/p3.16xlarge/;
s/<<<WORKER_TYPE>>>/m4.16xlarge/;
s/<<<MIN_WORKERS>>>/9/;
s/<<<MAX_WORKERS>>>/9/;
s/<<<PYTHON_VERSION>>>/$PYTHON_VERSION/;
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > "$CLUSTER"
echo "Try running IMPALA stress test."
{
RLLIB_DIR=../../python/ray/rllib/
ray --logging-level=DEBUG up -y "$CLUSTER" &&
ray rsync_up "$CLUSTER" $RLLIB_DIR/tuned_examples/ tuned_examples/ &&
# HACK: the test will deadlock if it scales up slowly, so we have to wait
# for the cluster to be fully launched first. This is because the first
# trial will occupy all the CPU slots if it can, preventing GPU access.
sleep 200 &&
ray --logging-level=DEBUG exec "$CLUSTER" "source activate tensorflow_p36 && rllib train -f tuned_examples/atari-impala-large.yaml --ray-address='localhost:6379' --queue-trials" &&
echo "PASS: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
} || echo "FAIL: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
# Tear down cluster.
if [ "$DEBUG_MODE" = "" ]; then
ray down -y "$CLUSTER"
rm "$CLUSTER"
else
echo "Not tearing down cluster" "$CLUSTER"
fi
popd
}
# Total runtime is about 20 minutes (if the AWS spot instance order is fulfilled).
# Actual test runtime is roughly 10 minutes.
test_sgd(){
local PYTHON_VERSION=$1
local WHEEL_STR=$(_find_wheel_str $PYTHON_VERSION)
pushd "$ROOT_DIR"
local TEST_NAME="sgd_$PYTHON_VERSION"
local CLUSTER="$TEST_NAME.yaml"
echo "Creating SGD cluster YAML from template."
cat application_cluster_template.yaml |
sed -e "
s/<<<RAY_VERSION>>>/$RAY_VERSION/g;
s/<<<RAY_COMMIT>>>/$RAY_COMMIT/;
s/<<<CLUSTER_NAME>>>/$TEST_NAME/;
s/<<<HEAD_TYPE>>>/p3.16xlarge/;
s/<<<WORKER_TYPE>>>/p3.16xlarge/;
s/<<<MIN_WORKERS>>>/3/;
s/<<<MAX_WORKERS>>>/3/;
s/<<<PYTHON_VERSION>>>/$PYTHON_VERSION/;
s/<<<WHEEL_STR>>>/$WHEEL_STR/;" > "$CLUSTER"
echo "Try running SGD stress test."
{
SGD_DIR=$ROOT_DIR/../../python/ray/util/sgd/
ray --logging-level=DEBUG up -y "$CLUSTER" &&
# TODO: fix submit so that args work
ray rsync_up "$CLUSTER" "$SGD_DIR/mnist_example.py" mnist_example.py &&
sleep 1 &&
ray --logging-level=DEBUG exec "$CLUSTER" "
python mnist_example.py --address=localhost:6379 --num-iters=2000 --num-workers=8 --devices-per-worker=2 --gpu" &&
echo "PASS: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
} || echo "FAIL: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE"
# Tear down cluster.
if [ "$DEBUG_MODE" = "" ]; then
ray down -y "$CLUSTER"
rm "$CLUSTER"
else
echo "Not tearing down cluster" "$CLUSTER"
fi
popd
}
# RUN TESTS
for PYTHON_VERSION in "p36"
do
test_impala $PYTHON_VERSION
done
cat "$RESULT_FILE"
cat "$RESULT_FILE" | grep FAIL > test.log
[ ! -s test.log ] || exit 1

View file

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Cause the script to exit if a single command fails.
set -e
# Show explicitly which commands are currently running.
set -x
MEMORY_SIZE="20G"
SHM_SIZE="20G"
docker build -q --no-cache -t ray-project/base-deps docker/base-deps
# Add Ray source
git rev-parse HEAD > ./docker/stress_test/git-rev
git archive -o ./docker/stress_test/ray.tar $(git rev-parse HEAD)
DOCKER_SHA=$(docker build --no-cache -q -t ray-project/stress_test docker/stress_test)
echo "Using Docker image" $DOCKER_SHA
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 \
-e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e RAY_AWS_SSH_KEY \
$DOCKER_SHA \
bash /ray/ci/stress_tests/run_stress_tests.sh
# docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 \
# -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e RAY_AWS_SSH_KEY \
# $DOCKER_SHA \
# bash /ray/ci/stress_tests/run_application_stress_tests.sh

View file

@ -1,61 +0,0 @@
#!/usr/bin/env bash
# Show explicitly which commands are currently running.
set -x
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
RESULT_FILE=$ROOT_DIR/results-$(date '+%Y-%m-%d_%H-%M-%S').log
touch "$RESULT_FILE"
echo "Logging to" "$RESULT_FILE"
if [[ -z "$1" ]]; then
echo "ERROR: The first argument must be the Ray version string."
exit 1
else
RAY_VERSION=$1
fi
if [[ -z "$2" ]]; then
echo "ERROR: The second argument must be the commit hash to test."
exit 1
else
RAY_COMMIT=$2
fi
echo "Testing ray==$RAY_VERSION at commit $RAY_COMMIT."
echo "The wheels used will live under https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_COMMIT/"
run_test(){
local test_name=$1
local CLUSTER="stress_testing_config_temporary.yaml"
cat stress_testing_config.yaml |
sed -e "
s/<<<RAY_VERSION>>>/$RAY_VERSION/g;
s/<<<RAY_COMMIT>>>/$RAY_COMMIT/;" > "$CLUSTER"
echo "Try running $test_name."
{
ray up -y $CLUSTER --cluster-name "$test_name" &&
sleep 1 &&
ray --logging-level=DEBUG submit "$CLUSTER" --cluster-name "$test_name" "$test_name.py"
} || echo "FAIL: $test_name" >> "$RESULT_FILE"
# Tear down cluster.
if [ "$DEBUG_MODE" = "" ]; then
ray down -y $CLUSTER --cluster-name "$test_name"
rm "$CLUSTER"
else
echo "Not tearing down cluster" "$CLUSTER"
fi
}
pushd "$ROOT_DIR"
run_test test_many_tasks
run_test test_dead_actors
popd
cat "$RESULT_FILE"
[ ! -s "$RESULT_FILE" ] || exit 1

View file

@ -1,117 +0,0 @@
####################################################################
# All nodes in this cluster will auto-terminate in 1 hour
####################################################################
# An unique identifier for the head node and workers of this cluster.
cluster_name: stress-testing
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
min_workers: 105
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers.
max_workers: 105
# The autoscaler will scale up the cluster to this target fraction of resource
# usage. For example, if a cluster of 10 nodes is 100% busy and
# target_utilization is 0.8, it would resize the cluster to 13. This fraction
# can be decreased to increase the aggressiveness of upscaling.
# This value must be less than 1.0 for scaling to happen.
target_utilization_fraction: 0.8
# If a node is idle for this many minutes, it will be removed.
idle_timeout_minutes: 5
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: False
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
# By default Ray creates a new private keypair, but you can also use your own.
# If you do so, make sure to also set "KeyName" in the head and worker node
# configurations below.
# ssh_private_key: /path/to/your/key.pem
# Provider-specific config for the head node, e.g. instance type. By default
# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
# For more documentation on available fields, see:
# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
head_node:
InstanceType: m4.16xlarge
ImageId: ami-06d51e91cea0dac8d # Ubuntu 18.04
# Set primary volume to 25 GiB
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
# Additional options in the boto docs.
# Provider-specific config for worker nodes, e.g. instance type. By default
# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
# For more documentation on available fields, see:
# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
worker_nodes:
InstanceType: m4.large
ImageId: ami-06d51e91cea0dac8d # Ubuntu 18.04
# Set primary volume to 25 GiB
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
# Run workers on spot by default. Comment this out to use on-demand.
InstanceMarketOptions:
MarketType: spot
# Additional options can be found in the boto docs, e.g.
# SpotOptions:
# MaxPrice: MAX_HOURLY_PRICE
# Additional options in the boto docs.
# Files or directories to copy to the head and worker nodes. The format is a
# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
file_mounts: {
# "/path1/on/remote/machine": "/path1/on/local/machine",
# "/path2/on/remote/machine": "/path2/on/local/machine",
}
# List of shell commands to run to set up nodes.
setup_commands:
# Uncomment these if you want to build ray from source.
# - sudo apt-get -qq update
# - sudo apt-get install -y build-essential curl unzip
# Install Anaconda.
- wget --quiet https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh || true
- bash Anaconda3-5.0.1-Linux-x86_64.sh -b -p $HOME/anaconda3 || true
- echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc
# # Build Ray.
# - git clone https://github.com/ray-project/ray || true
# - ray/ci/travis/install-bazel.sh
- pip install boto3==1.4.8 cython==0.29.0
# - cd ray/python; git checkout master; git pull; pip install -e . --verbose
- pip install https://s3-us-west-2.amazonaws.com/ray-wheels/releases/<<<RAY_VERSION>>>/<<<RAY_COMMIT>>>/ray-<<<RAY_VERSION>>>-cp36-cp36m-manylinux1_x86_64.whl
# Custom commands that will be run on the head node after common setup.
head_setup_commands: []
# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands: []
# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands:
- ray stop
- ulimit -n 65536; ray start --head --num-redis-shards=5 --redis-port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml
# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
- ray stop
- ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --num-gpus=100

View file

@ -24,9 +24,10 @@ This document describes the process for creating new releases.
For a new micro release (e.g., 0.7.1): No action is required.
4. **Testing:** Before releasing, the following sets of tests should be run. The results
of each of these tests for previous releases are checked in under ``doc/dev/release_tests``,
and should be compared against to identify any regressions.
4. **Testing:** Before releasing, the following sets of tests should be run.
The results of each of these tests for previous releases are checked in
under ``doc/dev/release_tests``, and should be compared against to identify
any regressions.
1. Long-running tests
@ -38,16 +39,16 @@ This document describes the process for creating new releases.
These tests should run for at least 24 hours (printing new iterations and CPU load
stable in the AWS console).
The last hundred lines or so printed by each test should be checked in under
``doc/dev/release_logs/<version>``.
2. Multi-node regression tests
2. Stress tests
Follow the same instruction as long running stress tests. The large scale distributed
regression tests identify potential performance regression in distributed environment.
The following test should be ran:
.. code-block:: bash
ray/ci/stress_tests/run_stress_tests.sh <release-version> <release-commit>
ray/ci/stress_tests/run_application_stress_tests.sh <release-version> <release-commit>
rllib train -f rllib/tuned_examples/compact-regression-test.yaml
- ``ci/regression_test/rllib_regression-tests`` run the compact regression test for rllib.
- ``ci/regression_test/rllib_stress_tests`` run multinode 8hr IMPALA trial.
- ``ci/regression_test/stress_tests`` contains two tests: ``many_tasks`` and ``dead_actors``.
Each of the test runs on 105 spot instances.
Make sure that these pass. For the RLlib regression tests, see the comment on the
file for the pass criteria. For the rest, it will be obvious if they passed.
@ -59,12 +60,9 @@ This document describes the process for creating new releases.
3. Microbenchmarks
.. code-block:: bash
ray microbenchmark
Run `ray microbenchmark` on an `m4.16xl` instance running `Ubuntu 18.04` with `Python 3` to get the
latest microbenchmark numbers.
Run the ``ci/microbenchmark`` with the commit. Under the hood, the session will
run `ray microbenchmark` on an `m4.16xl` instance running `Ubuntu 18.04` with `Python 3`
to get the latest microbenchmark numbers.
The results should be checked in under ``doc/dev/release_logs/<version>``.
@ -82,7 +80,8 @@ This document describes the process for creating new releases.
changes/updates/bugfixes and their PR numbers. Once you have a draft, send it
out to other Ray developers (especially those who contributed heavily during
this release) for feedback. At the end of the release note, you should also
add a list of contributors.
add a list of contributors. Make sure Ray, Tune, RLLib, Autoscaler are
capitalized correctly.
Run ``doc/dev/get_contributors.py`` to generate the list of commits corresponding
to this release and the formatted list of contributors.
@ -107,14 +106,16 @@ This document describes the process for creating new releases.
export RAY_HASH=... # e.g., 618147f57fb40368448da3b2fb4fd213828fa12b
export RAY_VERSION=... # e.g., 0.7.0
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27mu-manylinux1_x86_64.whl
# Linux Wheels
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-manylinux1_x86_64.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-manylinux1_x86_64.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-manylinux1_x86_64.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27m-macosx_10_6_intel.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_6_intel.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_6_intel.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_6_intel.whl
# Mac Wheels
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_13_intel.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_13_intel.whl
pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_13_intel.whl
8. **Upload to PyPI Test:** Upload the wheels to the PyPI test site using
``twine``.
@ -164,9 +165,14 @@ This document describes the process for creating new releases.
pip install -U ray
10. **Improve the release process:** Find some way to improve the release
10. **Create a point release on readthedocs page:** In the `read the docs project page`_,
mark the release branch as "active" so there is a point release for the documentation.
Add @richardliaw to add you if you don't have access.
11. **Improve the release process:** Find some way to improve the release
process so that whoever manages the release next will have an easier time.
.. _`sample PR for bumping a minor release version`: https://github.com/ray-project/ray/pull/6303
.. _`sample commit for bumping the release branch version`: https://github.com/ray-project/ray/commit/a39325d818339970e51677708d5596f4b8f790ce
.. _`GitHub release`: https://github.com/ray-project/ray/releases
.. _`read the docs project page`: https://readthedocs.org/projects/ray/

View file

@ -1,6 +1,6 @@
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-manylinux1_x86_64.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-manylinux1_x86_64.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-manylinux1_x86_64.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_6_intel.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_6_intel.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_6_intel.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_13_intel.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_13_intel.whl
wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_13_intel.whl

View file

@ -0,0 +1,18 @@
# NOTE: Make sure to run this with OMP_NUM_THREADS=64, otherwise the put gigabytes per
# seconds will be reduced. Put latency was reduced due to extra ipc call to raylet
# for ref counting.
single client get calls per second 11743.14 +- 2062.85
single client put calls per second 3133.08 +- 89.81
single client put gigabytes per second 10.33 +- 7.96
multi client put calls per second 3590.16 +- 22.04
multi client put gigabytes per second 23.38 +- 0.63
single client tasks sync per second 1263.59 +- 63.16
single client tasks async per second 13959.14 +- 393.16
multi client tasks async per second 42285.81 +- 238.55
1:1 actor calls sync per second 2159.21 +- 112.97
1:1 actor calls async per second 7048.53 +- 63.8
1:1 actor calls concurrent per second 6167.01 +- 75.67
1:n actor calls async per second 12241.67 +- 62.13
n:n actor calls async per second 41766.33 +- 672.14
n:n actor calls with arg async per second 13134.22 +- 71.68

View file

@ -0,0 +1,36 @@
== Status ==
Memory usage on this node: 43.4/480.3 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/64 CPUs, 0.0/8 GPUs, 0.0/440.23 GiB heap, 0.0/12.84 GiB objects
Result logdir: /home/ubuntu/ray_results/apex
Result logdir: /home/ubuntu/ray_results/atari-a2c
Result logdir: /home/ubuntu/ray_results/atari-basic-dqn
Result logdir: /home/ubuntu/ray_results/atari-impala
Result logdir: /home/ubuntu/ray_results/atari-ppo-tf
Result logdir: /home/ubuntu/ray_results/atari-ppo-torch
Number of trials: 24 (24 TERMINATED)
Table truncated to 20 rows. 4 trials (4 TERMINATED) not shown.
+--------------------------------------+------------+-------+----------+------------------+---------+--------+
| Trial name | status | loc | reward | total time (s) | ts | iter |
|--------------------------------------+------------+-------+----------+------------------+---------+--------|
| A2C_BreakoutNoFrameskip-v4_c8ad5a48 | TERMINATED | | 139.19 | 3606.77 | 3686000 | 352 |
| A2C_BreakoutNoFrameskip-v4_c8ad1c54 | TERMINATED | | 75.56 | 3601.57 | 2932000 | 349 |
| A2C_BreakoutNoFrameskip-v4_c8acd28a | TERMINATED | | 131.97 | 3603.39 | 2928000 | 349 |
| A2C_BreakoutNoFrameskip-v4_c8ac8d16 | TERMINATED | | 105.42 | 3601.03 | 2901500 | 349 |
| DQN_BreakoutNoFrameskip-v4_c8af8a02 | TERMINATED | | 15.81 | 3665.65 | 270000 | 27 |
| DQN_BreakoutNoFrameskip-v4_c8af079e | TERMINATED | | 11.32 | 3612.1 | 270000 | 27 |
| APEX_BreakoutNoFrameskip-v4_c8ac4694 | TERMINATED | | 50.56 | 3627.89 | 5786880 | 115 |
| DQN_BreakoutNoFrameskip-v4_c8ae61ae | TERMINATED | | 7.14 | 3620.61 | 270000 | 27 |
| DQN_BreakoutNoFrameskip-v4_c8adbcea | TERMINATED | | 11.24 | 3640.35 | 270000 | 27 |
| APEX_BreakoutNoFrameskip-v4_c8abef3c | TERMINATED | | 94.5 | 3625.19 | 5820800 | 115 |
| PPO_BreakoutNoFrameskip-v4_c8ab0572 | TERMINATED | | 25.26 | 3603.23 | 1335000 | 267 |
| PPO_BreakoutNoFrameskip-v4_c8aabf36 | TERMINATED | | 18.2 | 3603.36 | 1300000 | 260 |
| APEX_BreakoutNoFrameskip-v4_c8abaa86 | TERMINATED | | 90.98 | 3627.03 | 7350400 | 116 |
| PPO_BreakoutNoFrameskip-v4_c8aa6f5e | TERMINATED | | 17.01 | 3611.01 | 1555000 | 311 |
| PPO_BreakoutNoFrameskip-v4_c8aa27e2 | TERMINATED | | 22.41 | 3609.64 | 1545000 | 309 |
| PPO_BreakoutNoFrameskip-v4_c8a9e39a | TERMINATED | | 61.25 | 3602.17 | 4475000 | 895 |
| PPO_BreakoutNoFrameskip-v4_c8a97978 | TERMINATED | | 28.19 | 3601.33 | 4415000 | 883 |
| PPO_BreakoutNoFrameskip-v4_c8a904ca | TERMINATED | | 41.3 | 3600.42 | 4515000 | 903 |
| APEX_BreakoutNoFrameskip-v4_c8ab5108 | TERMINATED | | 62.46 | 3626.37 | 5091840 | 114 |
| PPO_BreakoutNoFrameskip-v4_c8a88004 | TERMINATED | | 60.44 | 3602.52 | 3380000 | 676 |
+--------------------------------------+------------+-------+----------+------------------+---------+--------+

View file

@ -0,0 +1,14 @@
== Status ==
Memory usage on this node: 34.6/480.3 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/640 CPUs, 0/8 GPUs, 0.0/2541.21 GiB heap, 0.0/128.42 GiB objects
Result logdir: /home/ubuntu/ray_results/atari-impala
Number of trials: 4 (4 TERMINATED)
+---------------------------------------------+------------+-------+-----------------------------+----------+------------------+----------+--------+
| Trial name | status | loc | env | reward | total time (s) | ts | iter |
|---------------------------------------------+------------+-------+-----------------------------+----------+------------------+----------+--------|
| IMPALA_BreakoutNoFrameskip-v4_2565545c | TERMINATED | | BreakoutNoFrameskip-v4 | 451.07 | 22555.3 | 30039500 | 381 |
| IMPALA_BeamRiderNoFrameskip-v4_2565e804 | TERMINATED | | BeamRiderNoFrameskip-v4 | 3124.8 | 24121.2 | 30057000 | 408 |
| IMPALA_QbertNoFrameskip-v4_256671de | TERMINATED | | QbertNoFrameskip-v4 | 8388.25 | 25163.5 | 30080000 | 453 |
| IMPALA_SpaceInvadersNoFrameskip-v4_256725ac | TERMINATED | | SpaceInvadersNoFrameskip-v4 | 780.65 | 23148.1 | 30026500 | 384 |
+---------------------------------------------+------------+-------+-----------------------------+----------+------------------+----------+--------+

View file

@ -0,0 +1,4 @@
Finished in: 98.49777579307556s
Average iteration time: 0.9849753308296204s
Max iteration time: 2.9459526538848877s
Min iteration time: 0.08075928688049316s

View file

@ -0,0 +1,15 @@
Stage 0 results:
Total time: 22.579216480255127
Stage 1 results:
Total time: 154.41431832313538
Average iteration time: 15.441423058509827
Max iteration time: 15.943994760513306
Min iteration time: 15.029884099960327
Stage 2 results:
Total time: 646.7662391662598
Average iteration time: 129.35279755592347
Max iteration time: 134.80017256736755
Min iteration time: 121.44297170639038
Stage 3 results:
Actor creation time: 0.0635519027709961
Total time: 3464.0461547374725