ray/rllib/tests/test_rollout.py

from gym.spaces import Box, Discrete
import os
from pathlib import Path
import re
import sys
import unittest

import ray
from ray import tune
from ray.rllib.examples.env.multi_agent import MultiAgentCartPole
from ray.rllib.utils.test_utils import framework_iterator


def rollout_test(algo, env="CartPole-v0", test_episode_rollout=False):
    extra_config = ""
    if algo == "ARS":
        extra_config = ",\"train_batch_size\": 10, \"noise_size\": 250000"
    elif algo == "ES":
        extra_config = ",\"episodes_per_batch\": 1,\"train_batch_size\": 10, "\
                       "\"noise_size\": 250000"

    for fw in framework_iterator(frameworks=("tf", "torch")):
        fw_ = ", \"framework\": \"{}\"".format(fw)

        tmp_dir = os.popen("mktemp -d").read()[:-1]
        if not os.path.exists(tmp_dir):
            sys.exit(1)

        print("Saving results to {}".format(tmp_dir))

        rllib_dir = str(Path(__file__).parent.parent.absolute())
        print("RLlib dir = {}\nexists={}".format(rllib_dir,
                                                 os.path.exists(rllib_dir)))
        os.system("python {}/train.py --local-dir={} --run={} "
                  "--checkpoint-freq=1 ".format(rllib_dir, tmp_dir, algo) +
                  "--config='{" + "\"num_workers\": 1, \"num_gpus\": 0{}{}".
                  format(fw_, extra_config) +
                  ", \"timesteps_per_iteration\": 5,\"min_iter_time_s\": 0.1, "
                  "\"model\": {\"fcnet_hiddens\": [10]}"
                  "}' --stop='{\"training_iteration\": 1}'" +
                  " --env={}".format(env))

        checkpoint_path = os.popen("ls {}/default/*/checkpoint_000001/"
                                   "checkpoint-1".format(tmp_dir)).read()[:-1]
        if not os.path.exists(checkpoint_path):
            sys.exit(1)
        print("Checkpoint path {} (exists)".format(checkpoint_path))

        # Test rolling out n steps.
        os.popen("python {}/rollout.py --run={} \"{}\" --steps=10 "
                 "--out=\"{}/rollouts_10steps.pkl\" --no-render".format(
                     rllib_dir, algo, checkpoint_path, tmp_dir)).read()
        if not os.path.exists(tmp_dir + "/rollouts_10steps.pkl"):
            sys.exit(1)
        print("rollout output (10 steps) exists!".format(checkpoint_path))

        # Test rolling out 1 episode.
        if test_episode_rollout:
            os.popen("python {}/rollout.py --run={} \"{}\" --episodes=1 "
                     "--out=\"{}/rollouts_1episode.pkl\" --no-render".format(
                         rllib_dir, algo, checkpoint_path, tmp_dir)).read()
            if not os.path.exists(tmp_dir + "/rollouts_1episode.pkl"):
                sys.exit(1)
            print("rollout output (1 ep) exists!".format(checkpoint_path))

        # Cleanup.
        os.popen("rm -rf \"{}\"".format(tmp_dir)).read()


def learn_test_plus_rollout(algo, env="CartPole-v0"):
    for fw in framework_iterator(frameworks=("tf", "torch")):
        fw_ = ", \\\"framework\\\": \\\"{}\\\"".format(fw)

        tmp_dir = os.popen("mktemp -d").read()[:-1]
        if not os.path.exists(tmp_dir):
            # Last resort: Resolve via underlying tempdir (and cut tmp_.
            tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]
            if not os.path.exists(tmp_dir):
                sys.exit(1)

        print("Saving results to {}".format(tmp_dir))

        rllib_dir = str(Path(__file__).parent.parent.absolute())
        print("RLlib dir = {}\nexists={}".format(rllib_dir,
                                                 os.path.exists(rllib_dir)))
        os.system("python {}/train.py --local-dir={} --run={} "
                  "--checkpoint-freq=1 --checkpoint-at-end ".format(
                      rllib_dir, tmp_dir, algo) +
                  "--config=\"{\\\"num_gpus\\\": 0, \\\"num_workers\\\": 1, "
                  "\\\"evaluation_config\\\": {\\\"explore\\\": false}" + fw_ +
                  "}\" " + "--stop=\"{\\\"episode_reward_mean\\\": 150.0}\"" +
                  " --env={}".format(env))

        # Find last checkpoint and use that for the rollout.
        checkpoint_path = os.popen("ls {}/default/*/checkpoint_*/"
                                   "checkpoint-*".format(tmp_dir)).read()[:-1]
        checkpoints = [
            cp for cp in checkpoint_path.split("\n")
            if re.match(r"^.+checkpoint-\d+$", cp)
        ]
        # Sort by number and pick last (which should be the best checkpoint).
        last_checkpoint = sorted(
            checkpoints,
            key=lambda x: int(re.match(r".+checkpoint-(\d+)", x).group(1)))[-1]
        assert re.match(r"^.+checkpoint_\d+/checkpoint-\d+$", last_checkpoint)
        if not os.path.exists(last_checkpoint):
            sys.exit(1)
        print("Best checkpoint={} (exists)".format(last_checkpoint))

        # Test rolling out n steps.
        result = os.popen(
            "python {}/rollout.py --run={} "
            "--steps=400 "
            "--out=\"{}/rollouts_n_steps.pkl\" --no-render \"{}\"".format(
                rllib_dir, algo, tmp_dir, last_checkpoint)).read()[:-1]
        if not os.path.exists(tmp_dir + "/rollouts_n_steps.pkl"):
            sys.exit(1)
        print("Rollout output exists -> Checking reward ...".format(
            checkpoint_path))
        episodes = result.split("\n")
        mean_reward = 0.0
        num_episodes = 0
        for ep in episodes:
            mo = re.match(r"Episode .+reward: ([\d\.\-]+)", ep)
            if mo:
                mean_reward += float(mo.group(1))
                num_episodes += 1
        mean_reward /= num_episodes
        print("Rollout's mean episode reward={}".format(mean_reward))
        assert mean_reward >= 150.0

        # Cleanup.
        os.popen("rm -rf \"{}\"".format(tmp_dir)).read()


def learn_test_multi_agent_plus_rollout(algo):
    for fw in framework_iterator(frameworks=("tf", "torch")):
        tmp_dir = os.popen("mktemp -d").read()[:-1]
        if not os.path.exists(tmp_dir):
            # Last resort: Resolve via underlying tempdir (and cut tmp_.
            tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]
            if not os.path.exists(tmp_dir):
                sys.exit(1)

        print("Saving results to {}".format(tmp_dir))

        rllib_dir = str(Path(__file__).parent.parent.absolute())
        print("RLlib dir = {}\nexists={}".format(rllib_dir,
                                                 os.path.exists(rllib_dir)))

        def policy_fn(agent):
            return "pol{}".format(agent)

        observation_space = Box(float("-inf"), float("inf"), (4, ))
        action_space = Discrete(2)

        config = {
            "num_gpus": 0,
            "num_workers": 1,
            "evaluation_config": {
                "explore": False
            },
            "framework": fw,
            "env": MultiAgentCartPole,
            "multiagent": {
                "policies": {
                    "pol0": (None, observation_space, action_space, {}),
                    "pol1": (None, observation_space, action_space, {}),
                },
                "policy_mapping_fn": policy_fn,
            },
        }
        stop = {"episode_reward_mean": 150.0}
        tune.run(
            algo,
            config=config,
            stop=stop,
            checkpoint_freq=1,
            checkpoint_at_end=True,
            local_dir=tmp_dir,
            verbose=1)

        # Find last checkpoint and use that for the rollout.
        checkpoint_path = os.popen("ls {}/PPO/*/checkpoint_*/"
                                   "checkpoint-*".format(tmp_dir)).read()[:-1]
        checkpoint_paths = checkpoint_path.split("\n")
        assert len(checkpoint_paths) > 0
        checkpoints = [
            cp for cp in checkpoint_paths
            if re.match(r"^.+checkpoint-\d+$", cp)
        ]
        # Sort by number and pick last (which should be the best checkpoint).
        last_checkpoint = sorted(
            checkpoints,
            key=lambda x: int(re.match(r".+checkpoint-(\d+)", x).group(1)))[-1]
        assert re.match(r"^.+checkpoint_\d+/checkpoint-\d+$", last_checkpoint)
        if not os.path.exists(last_checkpoint):
            sys.exit(1)
        print("Best checkpoint={} (exists)".format(last_checkpoint))

        ray.shutdown()

        # Test rolling out n steps.
        result = os.popen(
            "python {}/rollout.py --run={} "
            "--steps=400 "
            "--out=\"{}/rollouts_n_steps.pkl\" --no-render \"{}\"".format(
                rllib_dir, algo, tmp_dir, last_checkpoint)).read()[:-1]
        if not os.path.exists(tmp_dir + "/rollouts_n_steps.pkl"):
            sys.exit(1)
        print("Rollout output exists -> Checking reward ...".format(
            checkpoint_path))
        episodes = result.split("\n")
        mean_reward = 0.0
        num_episodes = 0
        for ep in episodes:
            mo = re.match(r"Episode .+reward: ([\d\.\-]+)", ep)
            if mo:
                mean_reward += float(mo.group(1))
                num_episodes += 1
        mean_reward /= num_episodes
        print("Rollout's mean episode reward={}".format(mean_reward))
        assert mean_reward >= 150.0

        # Cleanup.
        os.popen("rm -rf \"{}\"".format(tmp_dir)).read()


class TestRolloutSimple1(unittest.TestCase):
    def test_a3c(self):
        rollout_test("A3C")

    def test_ddpg(self):
        rollout_test("DDPG", env="Pendulum-v0")


class TestRolloutSimple2(unittest.TestCase):
    def test_dqn(self):
        rollout_test("DQN")

    def test_es(self):
        rollout_test("ES")


class TestRolloutSimple3(unittest.TestCase):
    def test_impala(self):
        rollout_test("IMPALA", env="CartPole-v0")

    def test_ppo(self):
        rollout_test("PPO", env="CartPole-v0", test_episode_rollout=True)


class TestRolloutSimple4(unittest.TestCase):
    def test_sac(self):
        rollout_test("SAC", env="Pendulum-v0")


class TestRolloutLearntPolicy(unittest.TestCase):
    def test_ppo_train_then_rollout(self):
        learn_test_plus_rollout("PPO")

    def test_ppo_multi_agent_train_then_rollout(self):
        learn_test_multi_agent_plus_rollout("PPO")


if __name__ == "__main__":
    import pytest

    # One can specify the specific TestCase class to run.
    # None for all unittest.TestCase classes in this file.
    class_ = sys.argv[1] if len(sys.argv) > 1 else None
    sys.exit(
        pytest.main(
            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`from gym.spaces import Box, Discrete`
[RLlib] Move all jenkins RLlib-tests into bazel (rllib/BUILD). (#7178) * commit * comment 2020-02-15 23:50:44 +01:00			`import os`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`from pathlib import Path`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`import re`
[RLlib] PPO, APPO, and DD-PPO code cleanup. (#10420) 2020-09-02 14:03:01 +02:00			`import sys`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import unittest`

[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`import ray`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`from ray import tune`
			`from ray.rllib.examples.env.multi_agent import MultiAgentCartPole`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`from ray.rllib.utils.test_utils import framework_iterator`

[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`def rollout_test(algo, env="CartPole-v0", test_episode_rollout=False):`
			`extra_config = ""`
[RLlib] Fix `use_lstm` flag for ModelV2 (w/o ModelV1 wrapping) and add it for PyTorch. (#8734) 2020-06-05 15:40:30 +02:00			`if algo == "ARS":`
			`extra_config = ",\"train_batch_size\": 10, \"noise_size\": 250000"`
			`elif algo == "ES":`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`extra_config = ",\"episodes_per_batch\": 1,\"train_batch_size\": 10, "\`
			`"\"noise_size\": 250000"`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`for fw in framework_iterator(frameworks=("tf", "torch")):`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`fw_ = ", \"framework\": \"{}\"".format(fw)`

			`tmp_dir = os.popen("mktemp -d").read()[:-1]`
			`if not os.path.exists(tmp_dir):`
			`sys.exit(1)`

			`print("Saving results to {}".format(tmp_dir))`

			`rllib_dir = str(Path(__file__).parent.parent.absolute())`
			`print("RLlib dir = {}\nexists={}".format(rllib_dir,`
			`os.path.exists(rllib_dir)))`
			`os.system("python {}/train.py --local-dir={} --run={} "`
			`"--checkpoint-freq=1 ".format(rllib_dir, tmp_dir, algo) +`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`"--config='{" + "\"num_workers\": 1, \"num_gpus\": 0{}{}".`
			`format(fw_, extra_config) +`
[RLlib] Fix `use_lstm` flag for ModelV2 (w/o ModelV1 wrapping) and add it for PyTorch. (#8734) 2020-06-05 15:40:30 +02:00			`", \"timesteps_per_iteration\": 5,\"min_iter_time_s\": 0.1, "`
			`"\"model\": {\"fcnet_hiddens\": [10]}"`
			`"}' --stop='{\"training_iteration\": 1}'" +`
			`" --env={}".format(env))`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
[tune] Add leading zeros to checkpoint directory (#14152) * [tune] Add leading zeros to checkpoint directory * Fix exp analysis tests/support string indices * Fix tests * RLLib tests 2021-03-01 12:12:19 +01:00			`checkpoint_path = os.popen("ls {}/default/*/checkpoint_000001/"`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"checkpoint-1".format(tmp_dir)).read()[:-1]`
			`if not os.path.exists(checkpoint_path):`
			`sys.exit(1)`
			`print("Checkpoint path {} (exists)".format(checkpoint_path))`

			`# Test rolling out n steps.`
			`os.popen("python {}/rollout.py --run={} \"{}\" --steps=10 "`
			`"--out=\"{}/rollouts_10steps.pkl\" --no-render".format(`
			`rllib_dir, algo, checkpoint_path, tmp_dir)).read()`
			`if not os.path.exists(tmp_dir + "/rollouts_10steps.pkl"):`
			`sys.exit(1)`
			`print("rollout output (10 steps) exists!".format(checkpoint_path))`

			`# Test rolling out 1 episode.`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`if test_episode_rollout:`
			`os.popen("python {}/rollout.py --run={} \"{}\" --episodes=1 "`
			`"--out=\"{}/rollouts_1episode.pkl\" --no-render".format(`
			`rllib_dir, algo, checkpoint_path, tmp_dir)).read()`
			`if not os.path.exists(tmp_dir + "/rollouts_1episode.pkl"):`
			`sys.exit(1)`
			`print("rollout output (1 ep) exists!".format(checkpoint_path))`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
			`# Cleanup.`
			`os.popen("rm -rf \"{}\"".format(tmp_dir)).read()`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00

[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`def learn_test_plus_rollout(algo, env="CartPole-v0"):`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`for fw in framework_iterator(frameworks=("tf", "torch")):`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`fw_ = ", \\\"framework\\\": \\\"{}\\\"".format(fw)`

			`tmp_dir = os.popen("mktemp -d").read()[:-1]`
			`if not os.path.exists(tmp_dir):`
			`# Last resort: Resolve via underlying tempdir (and cut tmp_.`
[Core] First pass at privatizing non-public Python APIs. (#14607) * async_compat * utils * cluster_utils * compat * function_manager * import_thread * memory_monitor * monitor, log_monitor, ray_process_reaper * metrics_agent * parameter * prometheus_exporter * ray_logging * signature 2021-03-10 23:47:28 -07:00			`tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`if not os.path.exists(tmp_dir):`
			`sys.exit(1)`

			`print("Saving results to {}".format(tmp_dir))`

			`rllib_dir = str(Path(__file__).parent.parent.absolute())`
			`print("RLlib dir = {}\nexists={}".format(rllib_dir,`
			`os.path.exists(rllib_dir)))`
			`os.system("python {}/train.py --local-dir={} --run={} "`
			`"--checkpoint-freq=1 --checkpoint-at-end ".format(`
			`rllib_dir, tmp_dir, algo) +`
			`"--config=\"{\\\"num_gpus\\\": 0, \\\"num_workers\\\": 1, "`
			`"\\\"evaluation_config\\\": {\\\"explore\\\": false}" + fw_ +`
[RLlib] 2 RLlib Flaky Tests (#14930) 2021-03-30 10:21:13 -07:00			`"}\" " + "--stop=\"{\\\"episode_reward_mean\\\": 150.0}\"" +`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`" --env={}".format(env))`

			`# Find last checkpoint and use that for the rollout.`
			`checkpoint_path = os.popen("ls {}/default//checkpoint_/"`
			`"checkpoint-*".format(tmp_dir)).read()[:-1]`
			`checkpoints = [`
			`cp for cp in checkpoint_path.split("\n")`
			`if re.match(r"^.+checkpoint-\d+$", cp)`
			`]`
			`# Sort by number and pick last (which should be the best checkpoint).`
			`last_checkpoint = sorted(`
			`checkpoints,`
			`key=lambda x: int(re.match(r".+checkpoint-(\d+)", x).group(1)))[-1]`
			`assert re.match(r"^.+checkpoint_\d+/checkpoint-\d+$", last_checkpoint)`
			`if not os.path.exists(last_checkpoint):`
			`sys.exit(1)`
			`print("Best checkpoint={} (exists)".format(last_checkpoint))`

			`# Test rolling out n steps.`
			`result = os.popen(`
			`"python {}/rollout.py --run={} "`
			`"--steps=400 "`
			`"--out=\"{}/rollouts_n_steps.pkl\" --no-render \"{}\"".format(`
			`rllib_dir, algo, tmp_dir, last_checkpoint)).read()[:-1]`
			`if not os.path.exists(tmp_dir + "/rollouts_n_steps.pkl"):`
			`sys.exit(1)`
			`print("Rollout output exists -> Checking reward ...".format(`
			`checkpoint_path))`
			`episodes = result.split("\n")`
			`mean_reward = 0.0`
			`num_episodes = 0`
			`for ep in episodes:`
			`mo = re.match(r"Episode .+reward: ([\d\.\-]+)", ep)`
			`if mo:`
			`mean_reward += float(mo.group(1))`
			`num_episodes += 1`
			`mean_reward /= num_episodes`
			`print("Rollout's mean episode reward={}".format(mean_reward))`
[RLlib] 2 RLlib Flaky Tests (#14930) 2021-03-30 10:21:13 -07:00			`assert mean_reward >= 150.0`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00
			`# Cleanup.`
			`os.popen("rm -rf \"{}\"".format(tmp_dir)).read()`


[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`def learn_test_multi_agent_plus_rollout(algo):`
			`for fw in framework_iterator(frameworks=("tf", "torch")):`
			`tmp_dir = os.popen("mktemp -d").read()[:-1]`
			`if not os.path.exists(tmp_dir):`
			`# Last resort: Resolve via underlying tempdir (and cut tmp_.`
[Core] First pass at privatizing non-public Python APIs. (#14607) * async_compat * utils * cluster_utils * compat * function_manager * import_thread * memory_monitor * monitor, log_monitor, ray_process_reaper * metrics_agent * parameter * prometheus_exporter * ray_logging * signature 2021-03-10 23:47:28 -07:00			`tmp_dir = ray._private.utils.tempfile.gettempdir() + tmp_dir[4:]`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`if not os.path.exists(tmp_dir):`
			`sys.exit(1)`

			`print("Saving results to {}".format(tmp_dir))`

			`rllib_dir = str(Path(__file__).parent.parent.absolute())`
			`print("RLlib dir = {}\nexists={}".format(rllib_dir,`
			`os.path.exists(rllib_dir)))`

			`def policy_fn(agent):`
			`return "pol{}".format(agent)`

			`observation_space = Box(float("-inf"), float("inf"), (4, ))`
			`action_space = Discrete(2)`

			`config = {`
			`"num_gpus": 0,`
			`"num_workers": 1,`
			`"evaluation_config": {`
			`"explore": False`
			`},`
			`"framework": fw,`
			`"env": MultiAgentCartPole,`
			`"multiagent": {`
			`"policies": {`
			`"pol0": (None, observation_space, action_space, {}),`
			`"pol1": (None, observation_space, action_space, {}),`
			`},`
			`"policy_mapping_fn": policy_fn,`
			`},`
			`}`
[RLlib] 2 RLlib Flaky Tests (#14930) 2021-03-30 10:21:13 -07:00			`stop = {"episode_reward_mean": 150.0}`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`tune.run(`
			`algo,`
			`config=config,`
			`stop=stop,`
			`checkpoint_freq=1,`
			`checkpoint_at_end=True,`
			`local_dir=tmp_dir,`
			`verbose=1)`

			`# Find last checkpoint and use that for the rollout.`
			`checkpoint_path = os.popen("ls {}/PPO//checkpoint_/"`
			`"checkpoint-*".format(tmp_dir)).read()[:-1]`
			`checkpoint_paths = checkpoint_path.split("\n")`
			`assert len(checkpoint_paths) > 0`
			`checkpoints = [`
			`cp for cp in checkpoint_paths`
			`if re.match(r"^.+checkpoint-\d+$", cp)`
			`]`
			`# Sort by number and pick last (which should be the best checkpoint).`
			`last_checkpoint = sorted(`
			`checkpoints,`
			`key=lambda x: int(re.match(r".+checkpoint-(\d+)", x).group(1)))[-1]`
			`assert re.match(r"^.+checkpoint_\d+/checkpoint-\d+$", last_checkpoint)`
			`if not os.path.exists(last_checkpoint):`
			`sys.exit(1)`
			`print("Best checkpoint={} (exists)".format(last_checkpoint))`

			`ray.shutdown()`

			`# Test rolling out n steps.`
			`result = os.popen(`
			`"python {}/rollout.py --run={} "`
			`"--steps=400 "`
			`"--out=\"{}/rollouts_n_steps.pkl\" --no-render \"{}\"".format(`
			`rllib_dir, algo, tmp_dir, last_checkpoint)).read()[:-1]`
			`if not os.path.exists(tmp_dir + "/rollouts_n_steps.pkl"):`
			`sys.exit(1)`
			`print("Rollout output exists -> Checking reward ...".format(`
			`checkpoint_path))`
			`episodes = result.split("\n")`
			`mean_reward = 0.0`
			`num_episodes = 0`
			`for ep in episodes:`
			`mo = re.match(r"Episode .+reward: ([\d\.\-]+)", ep)`
			`if mo:`
			`mean_reward += float(mo.group(1))`
			`num_episodes += 1`
			`mean_reward /= num_episodes`
			`print("Rollout's mean episode reward={}".format(mean_reward))`
[RLlib] 2 RLlib Flaky Tests (#14930) 2021-03-30 10:21:13 -07:00			`assert mean_reward >= 150.0`
[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00
			`# Cleanup.`
			`os.popen("rm -rf \"{}\"".format(tmp_dir)).read()`


[RLlib] Attempt splitting rollout test to avoid initial timeout (#14999) 2021-03-30 10:20:02 -07:00			`class TestRolloutSimple1(unittest.TestCase):`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00			`def test_a3c(self):`
			`rollout_test("A3C")`

			`def test_ddpg(self):`
			`rollout_test("DDPG", env="Pendulum-v0")`

[RLlib] Attempt splitting rollout test to avoid initial timeout (#14999) 2021-03-30 10:20:02 -07:00
			`class TestRolloutSimple2(unittest.TestCase):`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00			`def test_dqn(self):`
			`rollout_test("DQN")`

			`def test_es(self):`
			`rollout_test("ES")`

[RLlib] Attempt splitting rollout test to avoid initial timeout (#14999) 2021-03-30 10:20:02 -07:00
			`class TestRolloutSimple3(unittest.TestCase):`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00			`def test_impala(self):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`rollout_test("IMPALA", env="CartPole-v0")`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00
			`def test_ppo(self):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`rollout_test("PPO", env="CartPole-v0", test_episode_rollout=True)`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00
[RLlib] Attempt splitting rollout test to avoid initial timeout (#14999) 2021-03-30 10:20:02 -07:00
			`class TestRolloutSimple4(unittest.TestCase):`
[RLlib] Add all agents to `rllib rollout` tests. (#7534) 2020-03-12 19:02:51 +01:00			`def test_sac(self):`
			`rollout_test("SAC", env="Pendulum-v0")`

[RLlib] Move all jenkins RLlib-tests into bazel (rllib/BUILD). (#7178) * commit * comment 2020-02-15 23:50:44 +01:00
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00			`class TestRolloutLearntPolicy(unittest.TestCase):`
			`def test_ppo_train_then_rollout(self):`
			`learn_test_plus_rollout("PPO")`

[RLlib] rollout.py - Add multi-agent test case. (#9981) 2020-08-10 19:44:23 +02:00			`def test_ppo_multi_agent_train_then_rollout(self):`
			`learn_test_multi_agent_plus_rollout("PPO")`

[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00
[RLlib] Move all jenkins RLlib-tests into bazel (rllib/BUILD). (#7178) * commit * comment 2020-02-15 23:50:44 +01:00			`if __name__ == "__main__":`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import pytest`
[RLlib] `rllib rollout` test and bug fixes. (#9779) 2020-07-30 16:17:03 +02:00
			`# One can specify the specific TestCase class to run.`
			`# None for all unittest.TestCase classes in this file.`
			`class_ = sys.argv[1] if len(sys.argv) > 1 else None`
			`sys.exit(`
			`pytest.main(`
			`["-v", __file__ + ("" if class_ is None else "::" + class_)]))`