mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[rllib][asv] Support ASV for RLlib (#2304)
This commit is contained in:
parent
92ab7e56ec
commit
3cc27d2840
5 changed files with 283 additions and 3 deletions
|
@ -8,22 +8,31 @@ You can run the benchmark suite by doing the following:
|
|||
|
||||
To run ASV inside docker, you can use the following command:
|
||||
``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_asv.sh'``
|
||||
``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_rllib_asv.sh'``
|
||||
|
||||
|
||||
Visualizing Benchmarks
|
||||
======================
|
||||
|
||||
To visualize benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`. Assuming asv is installed,
|
||||
For visualizing regular Ray benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`.
|
||||
|
||||
.. code-block::
|
||||
|
||||
cd $RAY_DIR/python
|
||||
aws s3 sync s3://$BUCKET/ASV/ .
|
||||
|
||||
Then, you can run:
|
||||
For rllib, you must sync a _particular_ folder down to `$RLLIB_DIR (ray/python/ray/rllib)`.
|
||||
|
||||
.. code-block::
|
||||
|
||||
cd $RAY_DIR/python/ray/rllib
|
||||
aws s3 sync s3://$BUCKET/RLLIB_RESULTS/ ./RLLIB_RESULTS
|
||||
|
||||
Then, in the directory, you can run:
|
||||
|
||||
.. code-block::
|
||||
|
||||
asv publish --no-pull
|
||||
asv preview
|
||||
|
||||
This creates the directory and then launches a server.
|
||||
This creates the directory and then launches a server at which you can visualize results.
|
||||
|
|
141
python/ray/rllib/asv.conf.json
Normal file
141
python/ray/rllib/asv.conf.json
Normal file
|
@ -0,0 +1,141 @@
|
|||
{
|
||||
// The version of the config file format. Do not change, unless
|
||||
// you know what you are doing.
|
||||
"version": 1,
|
||||
|
||||
// The name of the project being benchmarked
|
||||
"project": "rllib",
|
||||
|
||||
// The project's homepage
|
||||
"project_url": "http://rllib.io",
|
||||
|
||||
// The URL or local path of the source code repository for the
|
||||
// project being benchmarked
|
||||
"repo": "../../../",
|
||||
|
||||
// List of branches to benchmark. If not provided, defaults to "master"
|
||||
// (for git) or "default" (for mercurial).
|
||||
"branches": ["master"], // for git
|
||||
// "branches": ["default"], // for mercurial
|
||||
|
||||
// The DVCS being used. If not set, it will be automatically
|
||||
// determined from "repo" by looking at the protocol in the URL
|
||||
// (if remote), or by looking for special directories, such as
|
||||
// ".git" (if local).
|
||||
"dvcs": "git",
|
||||
|
||||
// The tool to use to create environments. May be "conda",
|
||||
// "virtualenv" or other value depending on the plugins in use.
|
||||
// If missing or the empty string, the tool will be automatically
|
||||
// determined by looking for tools on the PATH environment
|
||||
// variable.
|
||||
"environment_type": "conda",
|
||||
|
||||
// timeout in seconds for installing any dependencies in environment
|
||||
// defaults to 10 min
|
||||
//"install_timeout": 600,
|
||||
|
||||
// the base URL to show a commit for the project.
|
||||
"show_commit_url": "http://github.com/ray-project/ray/commit/",
|
||||
|
||||
// The Pythons you'd like to test against. If not provided, defaults
|
||||
// to the current version of Python used to run `asv`.
|
||||
"pythons": ["3.6"],
|
||||
|
||||
// The matrix of dependencies to test. Each key is the name of a
|
||||
// package (in PyPI) and the values are version numbers. An empty
|
||||
// list or empty string indicates to just test against the default
|
||||
// (latest) version. null indicates that the package is to not be
|
||||
// installed. If the package to be tested is only available from
|
||||
// PyPi, and the 'environment_type' is conda, then you can preface
|
||||
// the package name by 'pip+', and the package will be installed via
|
||||
// pip (with all the conda available packages installed first,
|
||||
// followed by the pip installed packages).
|
||||
//
|
||||
// "matrix": {
|
||||
// "numpy": ["1.6", "1.7"],
|
||||
// "six": ["", null], // test with and without six installed
|
||||
// "pip+emcee": [""], // emcee is only available for install with pip.
|
||||
// },
|
||||
|
||||
// Combinations of libraries/python versions can be excluded/included
|
||||
// from the set to test. Each entry is a dictionary containing additional
|
||||
// key-value pairs to include/exclude.
|
||||
//
|
||||
// An exclude entry excludes entries where all values match. The
|
||||
// values are regexps that should match the whole string.
|
||||
//
|
||||
// An include entry adds an environment. Only the packages listed
|
||||
// are installed. The 'python' key is required. The exclude rules
|
||||
// do not apply to includes.
|
||||
//
|
||||
// In addition to package names, the following keys are available:
|
||||
//
|
||||
// - python
|
||||
// Python version, as in the *pythons* variable above.
|
||||
// - environment_type
|
||||
// Environment type, as above.
|
||||
// - sys_platform
|
||||
// Platform, as in sys.platform. Possible values for the common
|
||||
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
|
||||
//
|
||||
// "exclude": [
|
||||
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
|
||||
// {"environment_type": "conda", "six": null}, // don't run without six on conda
|
||||
// ],
|
||||
//
|
||||
// "include": [
|
||||
// // additional env for python2.7
|
||||
// {"python": "2.7", "numpy": "1.8"},
|
||||
// // additional env if run on windows+conda
|
||||
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
|
||||
// ],
|
||||
|
||||
// The directory (relative to the current directory) that benchmarks are
|
||||
// stored in. If not provided, defaults to "benchmarks"
|
||||
"benchmark_dir": "tuned_examples/regression_tests",
|
||||
|
||||
// The directory (relative to the current directory) to cache the Python
|
||||
// environments in. If not provided, defaults to "env"
|
||||
// "env_dir": "env",
|
||||
|
||||
// The directory (relative to the current directory) that raw benchmark
|
||||
// results are stored in. If not provided, defaults to "results".
|
||||
"results_dir": "RLLIB_RESULTS",
|
||||
|
||||
// The directory (relative to the current directory) that the html tree
|
||||
// should be written to. If not provided, defaults to "html".
|
||||
// "html_dir": "html",
|
||||
|
||||
// The number of characters to retain in the commit hashes.
|
||||
// "hash_length": 8,
|
||||
|
||||
// `asv` will cache wheels of the recent builds in each
|
||||
// environment, making them faster to install next time. This is
|
||||
// number of builds to keep, per environment.
|
||||
// "wheel_cache_size": 0
|
||||
|
||||
// The commits after which the regression search in `asv publish`
|
||||
// should start looking for regressions. Dictionary whose keys are
|
||||
// regexps matching to benchmark names, and values corresponding to
|
||||
// the commit (exclusive) after which to start looking for
|
||||
// regressions. The default is to start from the first commit
|
||||
// with results. If the commit is `null`, regression detection is
|
||||
// skipped for the matching benchmark.
|
||||
//
|
||||
// "regressions_first_commits": {
|
||||
// "some_benchmark": "352cdf", // Consider regressions only after this commit
|
||||
// "another_benchmark": null, // Skip regression detection altogether
|
||||
// }
|
||||
|
||||
// The thresholds for relative change in results, after which `asv
|
||||
// publish` starts reporting regressions. Dictionary of the same
|
||||
// form as in ``regressions_first_commits``, with values
|
||||
// indicating the thresholds. If multiple entries match, the
|
||||
// maximum is taken. If no entry matches, the default is 5%.
|
||||
//
|
||||
// "regressions_thresholds": {
|
||||
// "some_benchmark": 0.01, // Threshold of 1%
|
||||
// "another_benchmark": 0.5, // Threshold of 50%
|
||||
// }
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
This class runs the regression YAMLs in the ASV format.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import os
|
||||
import yaml
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
|
||||
|
||||
CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def _evaulate_config(filename):
|
||||
with open(os.path.join(CONFIG_DIR, filename)) as f:
|
||||
experiments = yaml.load(f)
|
||||
ray.init()
|
||||
trials = tune.run_experiments(experiments)
|
||||
results = defaultdict(list)
|
||||
for t in trials:
|
||||
results["time_total_s"] += [t.last_result.time_total_s]
|
||||
results["episode_reward_mean"] += [t.last_result.episode_reward_mean]
|
||||
results["training_iteration"] += [t.last_result.training_iteration]
|
||||
|
||||
return {k: np.median(v) for k, v in results.items()}
|
||||
|
||||
|
||||
class Regression():
|
||||
def setup_cache(self):
|
||||
# We need to implement this in separate classes
|
||||
# below so that ASV will register the setup/class
|
||||
# as a separate test.
|
||||
raise NotImplementedError
|
||||
|
||||
def teardown(self, *args):
|
||||
ray.worker.cleanup()
|
||||
|
||||
def track_time(self, result):
|
||||
return result["time_total_s"]
|
||||
|
||||
def track_reward(self, result):
|
||||
return result["episode_reward_mean"]
|
||||
|
||||
def track_iterations(self, result):
|
||||
return result["training_iteration"]
|
||||
|
||||
|
||||
class TestCartPolePPO(Regression):
|
||||
_file = "cartpole-ppo.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestCartPolePG(Regression):
|
||||
_file = "cartpole-pg.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestPendulumDDPG(Regression):
|
||||
_file = "pendulum-ddpg.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestCartPoleES(Regression):
|
||||
_file = "cartpole-es.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestCartPoleDQN(Regression):
|
||||
_file = "cartpole-dqn.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestCartPoleA3C(Regression):
|
||||
_file = "cartpole-a3c.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
|
||||
class TestCartPoleA3CPyTorch(Regression):
|
||||
_file = "cartpole-a3c-pytorch.yaml"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
28
test/jenkins_tests/run_rllib_asv.sh
Executable file
28
test/jenkins_tests/run_rllib_asv.sh
Executable file
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Cause the script to exit if a single command fails.
|
||||
set -e
|
||||
|
||||
# Show explicitly which commands are currently running.
|
||||
set -x
|
||||
|
||||
BUCKET_NAME=ray-integration-testing/ASV
|
||||
COMMIT=$(cat /ray/git-rev)
|
||||
RLLIB_RESULTS=RLLIB_RESULTS
|
||||
RLLIB_RESULTS_DIR=/ray/python/ray/rllib/RLLIB_RESULTS
|
||||
pip install awscli
|
||||
|
||||
# Install Ray fork of ASV
|
||||
git clone https://github.com/ray-project/asv.git /tmp/asv/ || true
|
||||
cd /tmp/asv/
|
||||
pip install -e .
|
||||
|
||||
cd /ray/python/ray/rllib/
|
||||
asv machine --machine jenkins
|
||||
mkdir $RLLIB_RESULTS_DIR || true
|
||||
aws s3 cp s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks.json $RLLIB_RESULTS_DIR/benchmarks.json || true
|
||||
|
||||
asv run --show-stderr --python=same --force-record-commit=$COMMIT
|
||||
|
||||
aws s3 cp $RLLIB_RESULTS_DIR/benchmarks.json s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks_$COMMIT.json
|
||||
aws s3 sync $RLLIB_RESULTS_DIR/ s3://$BUCKET_NAME/RLLIB_RESULTS/
|
Loading…
Add table
Reference in a new issue