ray/rllib/examples/custom_metrics_and_callbacks.py

"""Example of using RLlib's debug callbacks.

Here we use callbacks to track the average CartPole pole angle magnitude as a
custom metric.
"""

import argparse
import numpy as np

import ray
from ray import tune


def on_episode_start(info):
    episode = info["episode"]
    print("episode {} started".format(episode.episode_id))
    episode.user_data["pole_angles"] = []
    episode.hist_data["pole_angles"] = []


def on_episode_step(info):
    episode = info["episode"]
    pole_angle = abs(episode.last_observation_for()[2])
    raw_angle = abs(episode.last_raw_obs_for()[2])
    assert pole_angle == raw_angle
    episode.user_data["pole_angles"].append(pole_angle)


def on_episode_end(info):
    episode = info["episode"]
    pole_angle = np.mean(episode.user_data["pole_angles"])
    print("episode {} ended with length {} and pole angles {}".format(
        episode.episode_id, episode.length, pole_angle))
    episode.custom_metrics["pole_angle"] = pole_angle
    episode.hist_data["pole_angles"] = episode.user_data["pole_angles"]


def on_sample_end(info):
    print("returned sample batch of size {}".format(info["samples"].count))


def on_train_result(info):
    print("trainer.train() result: {} -> {} episodes".format(
        info["trainer"], info["result"]["episodes_this_iter"]))
    # you can mutate the result dict to add new fields to return
    info["result"]["callback_ok"] = True


def on_postprocess_traj(info):
    episode = info["episode"]
    batch = info["post_batch"]
    print("postprocessed {} steps".format(batch.count))
    if "num_batches" not in episode.custom_metrics:
        episode.custom_metrics["num_batches"] = 0
    episode.custom_metrics["num_batches"] += 1


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-iters", type=int, default=2000)
    args = parser.parse_args()

    ray.init()
    trials = tune.run(
        "PG",
        stop={
            "training_iteration": args.num_iters,
        },
        config={
            "env": "CartPole-v0",
            "callbacks": {
                "on_episode_start": on_episode_start,
                "on_episode_step": on_episode_step,
                "on_episode_end": on_episode_end,
                "on_sample_end": on_sample_end,
                "on_train_result": on_train_result,
                "on_postprocess_traj": on_postprocess_traj,
            },
        },
        return_trials=True)

    # verify custom metrics for integration tests
    custom_metrics = trials[0].last_result["custom_metrics"]
    print(custom_metrics)
    assert "pole_angle_mean" in custom_metrics
    assert "pole_angle_min" in custom_metrics
    assert "pole_angle_max" in custom_metrics
    assert "num_batches_mean" in custom_metrics
    assert "callback_ok" in trials[0].last_result
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`"""Example of using RLlib's debug callbacks.`

			`Here we use callbacks to track the average CartPole pole angle magnitude as a`
			`custom metric.`
			`"""`

			`import argparse`
			`import numpy as np`

			`import ray`
			`from ray import tune`


			`def on_episode_start(info):`
			`episode = info["episode"]`
			`print("episode {} started".format(episode.episode_id))`
			`episode.user_data["pole_angles"] = []`
[rllib] Feature/histograms in tensorboard (#6942) * Added histogram functionality to custom metrics infrastructure (another tab in tensorboard) * updated example to include histogram metric * added histograms to TBXLogger * add episode rewards * lint Co-authored-by: Eric Liang <ekhliang@gmail.com> 2020-01-31 08:02:53 +02:00			`episode.hist_data["pole_angles"] = []`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00

			`def on_episode_step(info):`
			`episode = info["episode"]`
			`pole_angle = abs(episode.last_observation_for()[2])`
[rllib] Add callback accessor for raw observation, fix prev actions (#4212) 2019-03-06 10:21:05 -08:00			`raw_angle = abs(episode.last_raw_obs_for()[2])`
			`assert pole_angle == raw_angle`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`episode.user_data["pole_angles"].append(pole_angle)`


			`def on_episode_end(info):`
			`episode = info["episode"]`
[rllib] fixes from dogfooding multi-agent (#3456) auto wrap multi-agent dict and tuple spaces by keeping a policy -> preprocessor in the sampler add some Q-learning debug stats report min, max of custom metrics better errors 2018-12-05 23:31:45 -08:00			`pole_angle = np.mean(episode.user_data["pole_angles"])`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`print("episode {} ended with length {} and pole angles {}".format(`
[rllib] fixes from dogfooding multi-agent (#3456) auto wrap multi-agent dict and tuple spaces by keeping a policy -> preprocessor in the sampler add some Q-learning debug stats report min, max of custom metrics better errors 2018-12-05 23:31:45 -08:00			`episode.episode_id, episode.length, pole_angle))`
			`episode.custom_metrics["pole_angle"] = pole_angle`
[rllib] Feature/histograms in tensorboard (#6942) * Added histogram functionality to custom metrics infrastructure (another tab in tensorboard) * updated example to include histogram metric * added histograms to TBXLogger * add episode rewards * lint Co-authored-by: Eric Liang <ekhliang@gmail.com> 2020-01-31 08:02:53 +02:00			`episode.hist_data["pole_angles"] = episode.user_data["pole_angles"]`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00

			`def on_sample_end(info):`
			`print("returned sample batch of size {}".format(info["samples"].count))`


[rllib] Allow envs to be auto-registered; add on_train_result callback with curriculum example (#3451) * train step and docs * debug * doc * doc * fix examples * fix code * integration test * fix * ... * space * instance * Update .travis.yml * fix test 2018-12-03 23:15:43 -08:00			`def on_train_result(info):`
[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`print("trainer.train() result: {} -> {} episodes".format(`
			`info["trainer"], info["result"]["episodes_this_iter"]))`
[rllib] Allow envs to be auto-registered; add on_train_result callback with curriculum example (#3451) * train step and docs * debug * doc * doc * fix examples * fix code * integration test * fix * ... * space * instance * Update .travis.yml * fix test 2018-12-03 23:15:43 -08:00			`# you can mutate the result dict to add new fields to return`
			`info["result"]["callback_ok"] = True`


[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`def on_postprocess_traj(info):`
			`episode = info["episode"]`
[rllib] Allow access to batches prior to postprocessing (#4871) 2019-05-29 18:17:14 -07:00			`batch = info["post_batch"]`
[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`print("postprocessed {} steps".format(batch.count))`
			`if "num_batches" not in episode.custom_metrics:`
			`episode.custom_metrics["num_batches"] = 0`
			`episode.custom_metrics["num_batches"] += 1`


[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`if __name__ == "__main__":`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--num-iters", type=int, default=2000)`
			`args = parser.parse_args()`

			`ray.init()`
[rllib] Switch to tune.run() instead of run_experiments() (#4515) 2019-03-30 14:07:50 -07:00			`trials = tune.run(`
			`"PG",`
			`stop={`
			`"training_iteration": args.num_iters,`
			`},`
			`config={`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`"env": "CartPole-v0",`
[rllib] Switch to tune.run() instead of run_experiments() (#4515) 2019-03-30 14:07:50 -07:00			`"callbacks": {`
[tune] Deprecate tune.function (#5601) * remove tune function * remove examples * Update tune-usage.rst 2019-08-31 16:00:10 -07:00			`"on_episode_start": on_episode_start,`
			`"on_episode_step": on_episode_step,`
			`"on_episode_end": on_episode_end,`
			`"on_sample_end": on_sample_end,`
			`"on_train_result": on_train_result,`
			`"on_postprocess_traj": on_postprocess_traj,`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00			`},`
[rllib] Switch to tune.run() instead of run_experiments() (#4515) 2019-03-30 14:07:50 -07:00			`},`
[tune] experiment_analysis split to Analysis (#5115) 2019-07-27 01:10:52 -07:00			`return_trials=True)`
[rllib] Implement custom metrics (#3144) 2018-11-03 18:48:32 -07:00
			`# verify custom metrics for integration tests`
			`custom_metrics = trials[0].last_result["custom_metrics"]`
			`print(custom_metrics)`
[rllib] fixes from dogfooding multi-agent (#3456) auto wrap multi-agent dict and tuple spaces by keeping a policy -> preprocessor in the sampler add some Q-learning debug stats report min, max of custom metrics better errors 2018-12-05 23:31:45 -08:00			`assert "pole_angle_mean" in custom_metrics`
			`assert "pole_angle_min" in custom_metrics`
			`assert "pole_angle_max" in custom_metrics`
[rllib] Rename Agent to Trainer (#4556) 2019-04-07 00:36:18 -07:00			`assert "num_batches_mean" in custom_metrics`
[rllib] Allow envs to be auto-registered; add on_train_result callback with curriculum example (#3451) * train step and docs * debug * doc * doc * fix examples * fix code * integration test * fix * ... * space * instance * Update .travis.yml * fix test 2018-12-03 23:15:43 -08:00			`assert "callback_ok" in trials[0].last_result`