ray/rllib/examples/env_rendering_and_recording.py

# ---------------
# IMPORTANT NOTE:
# ---------------
# A recent bug in openAI gym prevents RLlib's "record_env" option
# from recording videos properly. Instead, the produced mp4 files
# have a size of 1kb and are corrupted.
# A simple fix for this is described here:
# https://github.com/openai/gym/issues/1925

import argparse
import gym
import numpy as np
import ray
from gym.spaces import Box, Discrete

from ray import tune
from ray.rllib.env.multi_agent_env import make_multi_agent

parser = argparse.ArgumentParser()
parser.add_argument(
    "--framework",
    choices=["tf", "tf2", "tfe", "torch"],
    default="tf",
    help="The DL framework specifier.",
)
parser.add_argument("--multi-agent", action="store_true")
parser.add_argument("--stop-iters", type=int, default=10)
parser.add_argument("--stop-timesteps", type=int, default=10000)
parser.add_argument("--stop-reward", type=float, default=9.0)


class CustomRenderedEnv(gym.Env):
    """Example of a custom env, for which you can specify rendering behavior."""

    # Must specify, which render modes are supported by your custom env.
    # For RLlib to render your env via the "render_env" config key, only
    # mode="rgb_array" is needed. RLlib will automatically produce a simple
    # viewer for the returned RGB-images for mode="human", such that you don't
    # have to provide your own window+render handling.
    metadata = {
        "render.modes": ["rgb_array"],
    }

    def __init__(self, config):
        self.end_pos = config.get("corridor_length", 10)
        self.max_steps = config.get("max_steps", 100)
        self.cur_pos = 0
        self.steps = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(0.0, 999.0, shape=(1,), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0.0
        self.steps = 0
        return [self.cur_pos]

    def step(self, action):
        self.steps += 1
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1.0
        elif action == 1:
            self.cur_pos += 1.0
        done = self.cur_pos >= self.end_pos or self.steps >= self.max_steps
        return [self.cur_pos], 10.0 if done else -0.1, done, {}

    def render(self, mode="rgb"):
        """Implements rendering logic for this env (given current state).

        You can either return an RGB image:
        np.array([height, width, 3], dtype=np.uint8) or take care of
        rendering in a window yourself here (return True then).
        For RLlib, though, only mode=rgb (returning an image) is needed,
        even when "render_env" is True in the RLlib config.

        Args:
            mode (str): One of "rgb", "human", or "ascii". See gym.Env for
                more information.

        Returns:
            Union[np.ndarray, bool]: An image to render or True (if rendering
                is handled entirely in here).
        """

        # Just generate a random image here for demonstration purposes.
        # Also see `gym/envs/classic_control/cartpole.py` for
        # an example on how to use a Viewer object.
        return np.random.randint(0, 256, size=(300, 400, 3), dtype=np.uint8)


MultiAgentCustomRenderedEnv = make_multi_agent(lambda config: CustomRenderedEnv(config))

if __name__ == "__main__":
    # Note: Recording and rendering in this example
    # should work for both local_mode=True|False.
    ray.init(num_cpus=4)
    args = parser.parse_args()

    # Example config causing
    config = {
        # Also try common gym envs like: "CartPole-v0" or "Pendulum-v1".
        "env": (MultiAgentCustomRenderedEnv if args.multi_agent else CustomRenderedEnv),
        "env_config": {
            "corridor_length": 10,
            "max_steps": 100,
        },
        # Evaluate once per training iteration.
        "evaluation_interval": 1,
        # Run evaluation on (at least) two episodes
        "evaluation_duration": 2,
        # ... using one evaluation worker (setting this to 0 will cause
        # evaluation to run on the local evaluation worker, blocking
        # training until evaluation is done).
        "evaluation_num_workers": 1,
        # Special evaluation config. Keys specified here will override
        # the same keys in the main config, but only for evaluation.
        "evaluation_config": {
            # Store videos in this relative directory here inside
            # the default output dir (~/ray_results/...).
            # Alternatively, you can specify an absolute path.
            # Set to True for using the default output dir (~/ray_results/...).
            # Set to False for not recording anything.
            "record_env": "videos",
            # "record_env": "/Users/xyz/my_videos/",
            # Render the env while evaluating.
            # Note that this will always only render the 1st RolloutWorker's
            # env and only the 1st sub-env in a vectorized env.
            "render_env": True,
        },
        "num_workers": 1,
        # Use a vectorized env with 2 sub-envs.
        "num_envs_per_worker": 2,
        "framework": args.framework,
    }

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }

    results = tune.run("PPO", config=config, stop=stop)
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`# ---------------`
			`# IMPORTANT NOTE:`
			`# ---------------`
			`# A recent bug in openAI gym prevents RLlib's "record_env" option`
			`# from recording videos properly. Instead, the produced mp4 files`
			`# have a size of 1kb and are corrupted.`
			`# A simple fix for this is described here:`
			`# https://github.com/openai/gym/issues/1925`

			`import argparse`
			`import gym`
			`import numpy as np`
			`import ray`
			`from gym.spaces import Box, Discrete`
[RLlib] Issues 16287 and 16200: RLlib not rendering custom multi-agent Envs. (#16428) 2021-06-19 08:57:53 +02:00
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`from ray import tune`
[RLlib] Issues 16287 and 16200: RLlib not rendering custom multi-agent Envs. (#16428) 2021-06-19 08:57:53 +02:00			`from ray.rllib.env.multi_agent_env import make_multi_agent`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00
			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
[RLlib] Examples scripts add argparse help and replace `--torch` with `--framework`. (#15832) 2021-05-18 13:18:12 +02:00			`"--framework",`
			`choices=["tf", "tf2", "tfe", "torch"],`
			`default="tf",`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`help="The DL framework specifier.",`
			`)`
[RLlib] Issues 16287 and 16200: RLlib not rendering custom multi-agent Envs. (#16428) 2021-06-19 08:57:53 +02:00			`parser.add_argument("--multi-agent", action="store_true")`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`parser.add_argument("--stop-iters", type=int, default=10)`
			`parser.add_argument("--stop-timesteps", type=int, default=10000)`
			`parser.add_argument("--stop-reward", type=float, default=9.0)`


			`class CustomRenderedEnv(gym.Env):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"""Example of a custom env, for which you can specify rendering behavior."""`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00
			`# Must specify, which render modes are supported by your custom env.`
			`# For RLlib to render your env via the "render_env" config key, only`
			`# mode="rgb_array" is needed. RLlib will automatically produce a simple`
			`# viewer for the returned RGB-images for mode="human", such that you don't`
			`# have to provide your own window+render handling.`
			`metadata = {`
			`"render.modes": ["rgb_array"],`
			`}`

			`def __init__(self, config):`
			`self.end_pos = config.get("corridor_length", 10)`
			`self.max_steps = config.get("max_steps", 100)`
			`self.cur_pos = 0`
			`self.steps = 0`
			`self.action_space = Discrete(2)`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.observation_space = Box(0.0, 999.0, shape=(1,), dtype=np.float32)`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00
			`def reset(self):`
			`self.cur_pos = 0.0`
			`self.steps = 0`
			`return [self.cur_pos]`

			`def step(self, action):`
			`self.steps += 1`
			`assert action in [0, 1], action`
			`if action == 0 and self.cur_pos > 0:`
			`self.cur_pos -= 1.0`
			`elif action == 1:`
			`self.cur_pos += 1.0`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`done = self.cur_pos >= self.end_pos or self.steps >= self.max_steps`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`return [self.cur_pos], 10.0 if done else -0.1, done, {}`

			`def render(self, mode="rgb"):`
			`"""Implements rendering logic for this env (given current state).`

			`You can either return an RGB image:`
			`np.array([height, width, 3], dtype=np.uint8) or take care of`
			`rendering in a window yourself here (return True then).`
			`For RLlib, though, only mode=rgb (returning an image) is needed,`
			`even when "render_env" is True in the RLlib config.`

			`Args:`
			`mode (str): One of "rgb", "human", or "ascii". See gym.Env for`
			`more information.`

			`Returns:`
			`Union[np.ndarray, bool]: An image to render or True (if rendering`
			`is handled entirely in here).`
			`"""`

			`# Just generate a random image here for demonstration purposes.`
			# Also see `gym/envs/classic_control/cartpole.py` for
			`# an example on how to use a Viewer object.`
			`return np.random.randint(0, 256, size=(300, 400, 3), dtype=np.uint8)`


[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`MultiAgentCustomRenderedEnv = make_multi_agent(lambda config: CustomRenderedEnv(config))`
[RLlib] Issues 16287 and 16200: RLlib not rendering custom multi-agent Envs. (#16428) 2021-06-19 08:57:53 +02:00
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`if __name__ == "__main__":`
			`# Note: Recording and rendering in this example`
			`# should work for both local_mode=True\|False.`
			`ray.init(num_cpus=4)`
			`args = parser.parse_args()`

			`# Example config causing`
			`config = {`
[RLlib] Upgrade gym version to 0.21 and deprecate pendulum-v0. (#19535) * Fix QMix, SAC, and MADDPA too. * Unpin gym and deprecate pendulum v0 Many tests in rllib depended on pendulum v0, however in gym 0.21, pendulum v0 was deprecated in favor of pendulum v1. This may change reward thresholds, so will have to potentially rerun all of the pendulum v1 benchmarks, or use another environment in favor. The same applies to frozen lake v0 and frozen lake v1 Lastly, all of the RLlib tests and have been moved to python 3.7 * Add gym installation based on python version. Pin python<= 3.6 to gym 0.19 due to install issues with atari roms in gym 0.20 * Reformatting * Fixing tests * Move atari-py install conditional to req.txt * migrate to new ale install method * Fix QMix, SAC, and MADDPA too. * Unpin gym and deprecate pendulum v0 Many tests in rllib depended on pendulum v0, however in gym 0.21, pendulum v0 was deprecated in favor of pendulum v1. This may change reward thresholds, so will have to potentially rerun all of the pendulum v1 benchmarks, or use another environment in favor. The same applies to frozen lake v0 and frozen lake v1 Lastly, all of the RLlib tests and have been moved to python 3.7 * Add gym installation based on python version. Pin python<= 3.6 to gym 0.19 due to install issues with atari roms in gym 0.20 Move atari-py install conditional to req.txt migrate to new ale install method Make parametric_actions_cartpole return float32 actions/obs Adding type conversions if obs/actions don't match space Add utils to make elements match gym space dtypes Co-authored-by: Jun Gong <jungong@anyscale.com> Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-11-03 08:24:00 -07:00			`# Also try common gym envs like: "CartPole-v0" or "Pendulum-v1".`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"env": (MultiAgentCustomRenderedEnv if args.multi_agent else CustomRenderedEnv),`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`"env_config": {`
			`"corridor_length": 10,`
			`"max_steps": 100,`
			`},`
			`# Evaluate once per training iteration.`
			`"evaluation_interval": 1,`
			`# Run evaluation on (at least) two episodes`
[RLlib] Allow for evaluation to run by `timesteps` (alternative to `episodes`) and add auto-setting to make sure train doesn't ever have to wait for eval (e.g. long episodes) to finish. (#20757) 2021-12-04 13:26:33 +01:00			`"evaluation_duration": 2,`
[RLlib] Fix env rendering and recording options (for non-local mode; >0 workers; +evaluation-workers). (#14796) 2021-03-23 10:06:06 +01:00			`# ... using one evaluation worker (setting this to 0 will cause`
			`# evaluation to run on the local evaluation worker, blocking`
			`# training until evaluation is done).`
			`"evaluation_num_workers": 1,`
			`# Special evaluation config. Keys specified here will override`
			`# the same keys in the main config, but only for evaluation.`
			`"evaluation_config": {`
			`# Store videos in this relative directory here inside`
			`# the default output dir (~/ray_results/...).`
			`# Alternatively, you can specify an absolute path.`
			`# Set to True for using the default output dir (~/ray_results/...).`
			`# Set to False for not recording anything.`
			`"record_env": "videos",`
			`# "record_env": "/Users/xyz/my_videos/",`
			`# Render the env while evaluating.`
			`# Note that this will always only render the 1st RolloutWorker's`
			`# env and only the 1st sub-env in a vectorized env.`
			`"render_env": True,`
			`},`
			`"num_workers": 1,`
			`# Use a vectorized env with 2 sub-envs.`
			`"num_envs_per_worker": 2,`
			`"framework": args.framework,`
			`}`

			`stop = {`
			`"training_iteration": args.stop_iters,`
			`"timesteps_total": args.stop_timesteps,`
			`"episode_reward_mean": args.stop_reward,`
			`}`

			`results = tune.run("PPO", config=config, stop=stop)`