ray/rllib/examples/custom_env.py

"""Example of a custom gym environment and model. Run this for a demo.

This example shows:
  - using a custom environment
  - using a custom model
  - using Tune for grid search

You can visualize experiment results in ~/ray_results using TensorBoard.
"""
import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np
import os

import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--torch", action="store_true")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--stop-iters", type=int, default=50)
parser.add_argument("--stop-timesteps", type=int, default=100000)
parser.add_argument("--stop-reward", type=float, default=0.1)


class SimpleCorridor(gym.Env):
    """Example of a custom env in which you have to walk down a corridor.

    You can configure the length of the corridor via the env config."""

    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1.0 if done else -0.1, done, {}


class CustomModel(TFModelV2):
    """Example of a keras custom model that just delegates to an fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(CustomModel, self).__init__(obs_space, action_space, num_outputs,
                                          model_config, name)
        self.model = FullyConnectedNetwork(obs_space, action_space,
                                           num_outputs, model_config, name)

    def forward(self, input_dict, state, seq_lens):
        return self.model.forward(input_dict, state, seq_lens)

    def value_function(self):
        return self.model.value_function()


class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,
                                       model_config, name)

    def forward(self, input_dict, state, seq_lens):
        input_dict["obs"] = input_dict["obs"].float()
        fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)
        return fc_out, []

    def value_function(self):
        return torch.reshape(self.torch_sub_model.value_function(), [-1])


if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    # Can also register the env creator function explicitly with:
    # register_env("corridor", lambda config: SimpleCorridor(config))
    ModelCatalog.register_custom_model(
        "my_model", TorchCustomModel if args.torch else CustomModel)

    config = {
        "env": SimpleCorridor,  # or "corridor" if registered above
        "env_config": {
            "corridor_length": 5,
        },
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "model": {
            "custom_model": "my_model",
        },
        "vf_share_layers": True,
        "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
        "num_workers": 1,  # parallelism
        "framework": "torch" if args.torch else "tf",
    }

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }

    results = tune.run(args.run, config=config, stop=stop)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
    ray.shutdown()
[rllib] Some API cleanups and documentation improvements (#4409) 2019-03-21 21:34:22 -07:00			`"""Example of a custom gym environment and model. Run this for a demo.`
[rllib] Add examples page, add hierarchical training example, delete SC2 examples (#3815) * wip * lint * wip * up * wip * update examples * wip * remove carla * update * improve envspec * link to custom * Update rllib-env.rst * update * fix * fn * lint * ds * ssd games * desc * fix up docs * fix 2019-01-29 21:06:09 -08:00
			`This example shows:`
			`- using a custom environment`
[rllib] Some API cleanups and documentation improvements (#4409) 2019-03-21 21:34:22 -07:00			`- using a custom model`
[rllib] Add examples page, add hierarchical training example, delete SC2 examples (#3815) * wip * lint * wip * up * wip * update examples * wip * remove carla * update * improve envspec * link to custom * Update rllib-env.rst * update * fix * fn * lint * ds * ssd games * desc * fix up docs * fix 2019-01-29 21:06:09 -08:00			`- using Tune for grid search`

			`You can visualize experiment results in ~/ray_results using TensorBoard.`
			`"""`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`import argparse`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00			`import gym`
			`from gym.spaces import Discrete, Box`
[RLlib] Attention Net integration into ModelV2 and learning RL example. (#8371) 2020-05-18 17:26:40 +02:00			`import numpy as np`
[RLlib] Fix all example scripts to run on GPUs. (#11105) 2020-10-02 23:07:44 +02:00			`import os`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00
[tune] Ray Tune API cleanup (#1454) Remove rllib dep: trainable is now a standalone abstract class that can be easily subclassed. Clean up hyperband: fix debug string and add an example. Remove YAML api / ScriptRunner: this was never really used. Move ray.init() out of run_experiments(): This provides greater flexibility and should be less confusing since there isn't an implicit init() done there. Note that this is a breaking API change for tune. 2018-01-24 16:55:17 -08:00			`import ray`
[rllib] Switch to tune.run() instead of run_experiments() (#4515) 2019-03-30 14:07:50 -07:00			`from ray import tune`
			`from ray.tune import grid_search`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`from ray.rllib.models import ModelCatalog`
			`from ray.rllib.models.tf.tf_modelv2 import TFModelV2`
[RLlib] Attention Net integration into ModelV2 and learning RL example. (#8371) 2020-05-18 17:26:40 +02:00			`from ray.rllib.models.tf.fcnet import FullyConnectedNetwork`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`from ray.rllib.models.torch.torch_modelv2 import TorchModelV2`
			`from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC`
			`from ray.rllib.utils.framework import try_import_tf, try_import_torch`
			`from ray.rllib.utils.test_utils import check_learning_achieved`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00
[RLlib] Tf2x preparation; part 2 (upgrading `try_import_tf()`). (#9136) * WIP. * Fixes. * LINT. * WIP. * WIP. * Fixes. * Fixes. * Fixes. * Fixes. * WIP. * Fixes. * Test * Fix. * Fixes and LINT. * Fixes and LINT. * LINT. 2020-06-30 10:13:20 +02:00			`tf1, tf, tfv = try_import_tf()`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`torch, nn = try_import_torch()`

			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--run", type=str, default="PPO")`
			`parser.add_argument("--torch", action="store_true")`
			`parser.add_argument("--as-test", action="store_true")`
			`parser.add_argument("--stop-iters", type=int, default=50)`
			`parser.add_argument("--stop-timesteps", type=int, default=100000)`
			`parser.add_argument("--stop-reward", type=float, default=0.1)`
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00
			`class SimpleCorridor(gym.Env):`
			`"""Example of a custom env in which you have to walk down a corridor.`

			`You can configure the length of the corridor via the env config."""`

			`def __init__(self, config):`
			`self.end_pos = config["corridor_length"]`
			`self.cur_pos = 0`
			`self.action_space = Discrete(2)`
[rllib] Upgrade to OpenAI Gym 0.10.3 (#1601) 2018-03-06 08:31:02 +00:00			`self.observation_space = Box(`
[rllib] Propagate model options correctly in ARS / ES, to action dist of PPO (#2974) * fix * fix * fix it * propagate conf to action dist * move carla example too * rr * Update policies.py * wip * lint 2018-10-01 12:49:39 -07:00			`0.0, self.end_pos, shape=(1, ), dtype=np.float32)`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00
updates (#1896) 2018-04-13 00:57:00 -07:00			`def reset(self):`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00			`self.cur_pos = 0`
			`return [self.cur_pos]`

updates (#1896) 2018-04-13 00:57:00 -07:00			`def step(self, action):`
[rllib] Propagate model options correctly in ARS / ES, to action dist of PPO (#2974) * fix * fix * fix it * propagate conf to action dist * move carla example too * rr * Update policies.py * wip * lint 2018-10-01 12:49:39 -07:00			`assert action in [0, 1], action`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00			`if action == 0 and self.cur_pos > 0:`
			`self.cur_pos -= 1`
			`elif action == 1:`
			`self.cur_pos += 1`
			`done = self.cur_pos >= self.end_pos`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`return [self.cur_pos], 1.0 if done else -0.1, done, {}`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00

[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00			`class CustomModel(TFModelV2):`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`"""Example of a keras custom model that just delegates to an fc-net."""`
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00
			`def __init__(self, obs_space, action_space, num_outputs, model_config,`
			`name):`
			`super(CustomModel, self).__init__(obs_space, action_space, num_outputs,`
			`model_config, name)`
			`self.model = FullyConnectedNetwork(obs_space, action_space,`
			`num_outputs, model_config, name)`
[rllib] Some API cleanups and documentation improvements (#4409) 2019-03-21 21:34:22 -07:00
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00			`def forward(self, input_dict, state, seq_lens):`
			`return self.model.forward(input_dict, state, seq_lens)`
[rllib] Some API cleanups and documentation improvements (#4409) 2019-03-21 21:34:22 -07:00
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00			`def value_function(self):`
			`return self.model.value_function()`
[rllib] Some API cleanups and documentation improvements (#4409) 2019-03-21 21:34:22 -07:00

[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`class TorchCustomModel(TorchModelV2, nn.Module):`
			`"""Example of a PyTorch custom model that just delegates to a fc-net."""`

			`def __init__(self, obs_space, action_space, num_outputs, model_config,`
			`name):`
			`TorchModelV2.__init__(self, obs_space, action_space, num_outputs,`
			`model_config, name)`
			`nn.Module.__init__(self)`

			`self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,`
			`model_config, name)`

			`def forward(self, input_dict, state, seq_lens):`
			`input_dict["obs"] = input_dict["obs"].float()`
			`fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)`
			`return fc_out, []`

			`def value_function(self):`
			`return torch.reshape(self.torch_sub_model.value_function(), [-1])`


[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00			`if __name__ == "__main__":`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`args = parser.parse_args()`
			`ray.init()`

[rllib] Allow envs to be auto-registered; add on_train_result callback with curriculum example (#3451) * train step and docs * debug * doc * doc * fix examples * fix code * integration test * fix * ... * space * instance * Update .travis.yml * fix test 2018-12-03 23:15:43 -08:00			`# Can also register the env creator function explicitly with:`
			`# register_env("corridor", lambda config: SimpleCorridor(config))`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`ModelCatalog.register_custom_model(`
			`"my_model", TorchCustomModel if args.torch else CustomModel)`

			`config = {`
			`"env": SimpleCorridor, # or "corridor" if registered above`
			`"env_config": {`
			`"corridor_length": 5,`
[rllib] Switch to tune.run() instead of run_experiments() (#4515) 2019-03-30 14:07:50 -07:00			`},`
[RLlib] Fix all example scripts to run on GPUs. (#11105) 2020-10-02 23:07:44 +02:00			# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
			`"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`"model": {`
			`"custom_model": "my_model",`
[rllib] improve custom env docs (#1447) * env docs * add env * update env * Fri Jan 19 18:55:34 PST 2018 2018-01-19 21:36:18 -08:00			`},`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`"vf_share_layers": True,`
			`"lr": grid_search([1e-2, 1e-4, 1e-6]), # try different lrs`
			`"num_workers": 1, # parallelism`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"framework": "torch" if args.torch else "tf",`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`}`

			`stop = {`
			`"training_iteration": args.stop_iters,`
			`"timesteps_total": args.stop_timesteps,`
			`"episode_reward_mean": args.stop_reward,`
			`}`

			`results = tune.run(args.run, config=config, stop=stop)`

			`if args.as_test:`
			`check_learning_achieved(results, args.stop_reward)`
			`ray.shutdown()`