ray/rllib/examples/custom_experiment.py

"""Example of a custom experiment wrapped around an RLlib Algorithm."""
import argparse

import ray
from ray import tune
import ray.rllib.algorithms.ppo as ppo

parser = argparse.ArgumentParser()
parser.add_argument("--train-iterations", type=int, default=10)


def experiment(config):
    iterations = config.pop("train-iterations")
    algo = ppo.PPO(config=config, env="CartPole-v0")
    checkpoint = None
    train_results = {}

    # Train
    for i in range(iterations):
        train_results = algo.train()
        if i % 2 == 0 or i == iterations - 1:
            checkpoint = algo.save(tune.get_trial_dir())
        tune.report(**train_results)
    algo.stop()

    # Manual Eval
    config["num_workers"] = 0
    eval_algo = ppo.PPO(config=config, env="CartPole-v0")
    eval_algo.restore(checkpoint)
    env = eval_algo.workers.local_worker().env

    obs = env.reset()
    done = False
    eval_results = {"eval_reward": 0, "eval_eps_length": 0}
    while not done:
        action = eval_algo.compute_single_action(obs)
        next_obs, reward, done, info = env.step(action)
        eval_results["eval_reward"] += reward
        eval_results["eval_eps_length"] += 1
    results = {**train_results, **eval_results}
    tune.report(results)


if __name__ == "__main__":
    args = parser.parse_args()

    ray.init(num_cpus=3)
    config = ppo.DEFAULT_CONFIG.copy()
    config["train-iterations"] = args.train_iterations

    config["env"] = "CartPole-v0"

    tune.run(
        experiment,
        config=config,
        resources_per_trial=ppo.PPO.default_resource_request(config),
    )
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`"""Example of a custom experiment wrapped around an RLlib Algorithm."""`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`import argparse`

			`import ray`
			`from ray import tune`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`import ray.rllib.algorithms.ppo as ppo`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--train-iterations", type=int, default=10)`


			`def experiment(config):`
			`iterations = config.pop("train-iterations")`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`algo = ppo.PPO(config=config, env="CartPole-v0")`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`checkpoint = None`
			`train_results = {}`

			`# Train`
			`for i in range(iterations):`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`train_results = algo.train()`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`if i % 2 == 0 or i == iterations - 1:`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`checkpoint = algo.save(tune.get_trial_dir())`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`tune.report(**train_results)`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`algo.stop()`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00
			`# Manual Eval`
			`config["num_workers"] = 0`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`eval_algo = ppo.PPO(config=config, env="CartPole-v0")`
			`eval_algo.restore(checkpoint)`
			`env = eval_algo.workers.local_worker().env`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00
			`obs = env.reset()`
			`done = False`
			`eval_results = {"eval_reward": 0, "eval_eps_length": 0}`
			`while not done:`
[RLlib] `Trainer` to `Algorithm` renaming. (#25539) 2022-06-11 15:10:39 +02:00			`action = eval_algo.compute_single_action(obs)`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`next_obs, reward, done, info = env.step(action)`
			`eval_results["eval_reward"] += reward`
			`eval_results["eval_eps_length"] += 1`
			`results = {train_results, eval_results}`
			`tune.report(results)`


			`if __name__ == "__main__":`
			`args = parser.parse_args()`

			`ray.init(num_cpus=3)`
			`config = ppo.DEFAULT_CONFIG.copy()`
			`config["train-iterations"] = args.train_iterations`

			`config["env"] = "CartPole-v0"`

			`tune.run(`
			`experiment,`
			`config=config,`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`resources_per_trial=ppo.PPO.default_resource_request(config),`
[RLlib] Example and test for custom Trainer wrapper experiments (#14652) 2021-03-24 11:22:46 -04:00			`)`