[rllib][asv] Support ASV for RLlib (#2304)

2025-03-06 02:21:39 -05:00 · 2018-06-28 17:20:09 -07:00 · 2018-06-28 17:20:09 -07:00 · 3cc27d2840
commit 3cc27d2840
parent 92ab7e56ec
5 changed files with 283 additions and 3 deletions
--- a/python/README-benchmarks.rst
+++ b/python/README-benchmarks.rst
@ -8,22 +8,31 @@ You can run the benchmark suite by doing the following:
 To run ASV inside docker, you can use the following command:
 ``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_asv.sh'``
 ``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_rllib_asv.sh'``
 Visualizing Benchmarks
 ======================
-To visualize benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`. Assuming asv is installed,
+For visualizing regular Ray benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`.
 .. code-block::
  cd $RAY_DIR/python
  aws s3 sync s3://$BUCKET/ASV/ .
-Then, you can run:
+For rllib, you must sync a _particular_ folder down to `$RLLIB_DIR (ray/python/ray/rllib)`.
 .. code-block::
  cd $RAY_DIR/python/ray/rllib
  aws s3 sync s3://$BUCKET/RLLIB_RESULTS/ ./RLLIB_RESULTS
 Then, in the directory, you can run:
 .. code-block::
  asv publish --no-pull
  asv preview
-This creates the directory and then launches a server.
+This creates the directory and then launches a server at which you can visualize results.
--- a/python/ray/rllib/asv.conf.json
+++ b/python/ray/rllib/asv.conf.json
@ -0,0 +1,141 @@
 {
    // The version of the config file format.  Do not change, unless
    // you know what you are doing.
    "version": 1,
    // The name of the project being benchmarked
    "project": "rllib",
    // The project's homepage
    "project_url": "http://rllib.io",
    // The URL or local path of the source code repository for the
    // project being benchmarked
    "repo": "../../../",
    // List of branches to benchmark. If not provided, defaults to "master"
    // (for git) or "default" (for mercurial).
    "branches": ["master"], // for git
    // "branches": ["default"],    // for mercurial
    // The DVCS being used.  If not set, it will be automatically
    // determined from "repo" by looking at the protocol in the URL
    // (if remote), or by looking for special directories, such as
    // ".git" (if local).
    "dvcs": "git",
    // The tool to use to create environments.  May be "conda",
    // "virtualenv" or other value depending on the plugins in use.
    // If missing or the empty string, the tool will be automatically
    // determined by looking for tools on the PATH environment
    // variable.
    "environment_type": "conda",
    // timeout in seconds for installing any dependencies in environment
    // defaults to 10 min
    //"install_timeout": 600,
    // the base URL to show a commit for the project.
    "show_commit_url": "http://github.com/ray-project/ray/commit/",
    // The Pythons you'd like to test against.  If not provided, defaults
    // to the current version of Python used to run `asv`.
    "pythons": ["3.6"],
    // The matrix of dependencies to test.  Each key is the name of a
    // package (in PyPI) and the values are version numbers.  An empty
    // list or empty string indicates to just test against the default
    // (latest) version. null indicates that the package is to not be
    // installed. If the package to be tested is only available from
    // PyPi, and the 'environment_type' is conda, then you can preface
    // the package name by 'pip+', and the package will be installed via
    // pip (with all the conda available packages installed first,
    // followed by the pip installed packages).
    //
    // "matrix": {
    //     "numpy": ["1.6", "1.7"],
    //     "six": ["", null],        // test with and without six installed
    //     "pip+emcee": [""],   // emcee is only available for install with pip.
    // },
    // Combinations of libraries/python versions can be excluded/included
    // from the set to test. Each entry is a dictionary containing additional
    // key-value pairs to include/exclude.
    //
    // An exclude entry excludes entries where all values match. The
    // values are regexps that should match the whole string.
    //
    // An include entry adds an environment. Only the packages listed
    // are installed. The 'python' key is required. The exclude rules
    // do not apply to includes.
    //
    // In addition to package names, the following keys are available:
    //
    // - python
    //     Python version, as in the *pythons* variable above.
    // - environment_type
    //     Environment type, as above.
    // - sys_platform
    //     Platform, as in sys.platform. Possible values for the common
    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
    //
    // "exclude": [
    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
    // ],
    //
    // "include": [
    //     // additional env for python2.7
    //     {"python": "2.7", "numpy": "1.8"},
    //     // additional env if run on windows+conda
    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
    // ],
    // The directory (relative to the current directory) that benchmarks are
    // stored in.  If not provided, defaults to "benchmarks"
    "benchmark_dir": "tuned_examples/regression_tests",
    // The directory (relative to the current directory) to cache the Python
    // environments in.  If not provided, defaults to "env"
    // "env_dir": "env",
    // The directory (relative to the current directory) that raw benchmark
    // results are stored in.  If not provided, defaults to "results".
    "results_dir": "RLLIB_RESULTS",
    // The directory (relative to the current directory) that the html tree
    // should be written to.  If not provided, defaults to "html".
    // "html_dir": "html",
    // The number of characters to retain in the commit hashes.
    // "hash_length": 8,
    // `asv` will cache wheels of the recent builds in each
    // environment, making them faster to install next time.  This is
    // number of builds to keep, per environment.
    // "wheel_cache_size": 0
    // The commits after which the regression search in `asv publish`
    // should start looking for regressions. Dictionary whose keys are
    // regexps matching to benchmark names, and values corresponding to
    // the commit (exclusive) after which to start looking for
    // regressions.  The default is to start from the first commit
    // with results. If the commit is `null`, regression detection is
    // skipped for the matching benchmark.
    //
    // "regressions_first_commits": {
    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
    //    "another_benchmark": null,   // Skip regression detection altogether
    // }
    // The thresholds for relative change in results, after which `asv
    // publish` starts reporting regressions. Dictionary of the same
    // form as in ``regressions_first_commits``, with values
    // indicating the thresholds.  If multiple entries match, the
    // maximum is taken. If no entry matches, the default is 5%.
    //
    // "regressions_thresholds": {
    //    "some_benchmark": 0.01,     // Threshold of 1%
    //    "another_benchmark": 0.5,   // Threshold of 50%
    // }
 }
--- a/python/ray/rllib/tuned_examples/regression_tests/init.py
+++ b/python/ray/rllib/tuned_examples/regression_tests/init.py
--- a/python/ray/rllib/tuned_examples/regression_tests/regression_test.py
+++ b/python/ray/rllib/tuned_examples/regression_tests/regression_test.py
@ -0,0 +1,102 @@
 #!/usr/bin/env python
 """
 This class runs the regression YAMLs in the ASV format.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from collections import defaultdict
 import numpy as np
 import os
 import yaml
 import ray
 from ray import tune
 CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
 def _evaulate_config(filename):
    with open(os.path.join(CONFIG_DIR, filename)) as f:
        experiments = yaml.load(f)
    ray.init()
    trials = tune.run_experiments(experiments)
    results = defaultdict(list)
    for t in trials:
        results["time_total_s"] += [t.last_result.time_total_s]
        results["episode_reward_mean"] += [t.last_result.episode_reward_mean]
        results["training_iteration"] += [t.last_result.training_iteration]
    return {k: np.median(v) for k, v in results.items()}
 class Regression():
    def setup_cache(self):
        # We need to implement this in separate classes
        # below so that ASV will register the setup/class
        # as a separate test.
        raise NotImplementedError
    def teardown(self, *args):
        ray.worker.cleanup()
    def track_time(self, result):
        return result["time_total_s"]
    def track_reward(self, result):
        return result["episode_reward_mean"]
    def track_iterations(self, result):
        return result["training_iteration"]
 class TestCartPolePPO(Regression):
    _file = "cartpole-ppo.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestCartPolePG(Regression):
    _file = "cartpole-pg.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestPendulumDDPG(Regression):
    _file = "pendulum-ddpg.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestCartPoleES(Regression):
    _file = "cartpole-es.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestCartPoleDQN(Regression):
    _file = "cartpole-dqn.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestCartPoleA3C(Regression):
    _file = "cartpole-a3c.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
 class TestCartPoleA3CPyTorch(Regression):
    _file = "cartpole-a3c-pytorch.yaml"
    def setup_cache(self):
        return _evaulate_config(self._file)
--- a/test/jenkins_tests/run_rllib_asv.sh
+++ b/test/jenkins_tests/run_rllib_asv.sh
@ -0,0 +1,28 @@
 #!/usr/bin/env bash
 # Cause the script to exit if a single command fails.
 set -e
 # Show explicitly which commands are currently running.
 set -x
 BUCKET_NAME=ray-integration-testing/ASV
 COMMIT=$(cat /ray/git-rev)
 RLLIB_RESULTS=RLLIB_RESULTS
 RLLIB_RESULTS_DIR=/ray/python/ray/rllib/RLLIB_RESULTS
 pip install awscli
 # Install Ray fork of ASV
 git clone https://github.com/ray-project/asv.git /tmp/asv/ || true
 cd /tmp/asv/
 pip install -e .
 cd /ray/python/ray/rllib/
 asv machine --machine jenkins
 mkdir $RLLIB_RESULTS_DIR || true
 aws s3 cp s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks.json $RLLIB_RESULTS_DIR/benchmarks.json || true
 asv run --show-stderr --python=same --force-record-commit=$COMMIT
 aws s3 cp $RLLIB_RESULTS_DIR/benchmarks.json s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks_$COMMIT.json
 aws s3 sync $RLLIB_RESULTS_DIR/ s3://$BUCKET_NAME/RLLIB_RESULTS/