ray/rllib/examples/sb2rllib_rllib_example.py

"""
Example script on how to train, save, load, and test an RLlib agent.
Equivalent script with stable baselines: sb2rllib_sb_example.py.
Demonstrates transition from stable_baselines to Ray RLlib.

Run example: python sb2rllib_rllib_example.py
"""
import gym
import ray
import ray.rllib.algorithms.ppo as ppo

# settings used for both stable baselines and rllib
env_name = "CartPole-v1"
train_steps = 10000
learning_rate = 1e-3
save_dir = "saved_models"

# training and saving
analysis = ray.tune.run(
    "PPO",
    stop={"timesteps_total": train_steps},
    config={"env": env_name, "lr": learning_rate},
    checkpoint_at_end=True,
    local_dir=save_dir,
)
# retrieve the checkpoint path
analysis.default_metric = "episode_reward_mean"
analysis.default_mode = "max"
checkpoint_path = analysis.get_best_checkpoint(trial=analysis.get_best_trial())
print(f"Trained model saved at {checkpoint_path}")

# load and restore model
agent = ppo.PPO(env=env_name)
agent.restore(checkpoint_path)
print(f"Agent loaded from saved model at {checkpoint_path}")

# inference
env = gym.make(env_name)
obs = env.reset()
for i in range(1000):
    action = agent.compute_single_action(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        print(f"Cart pole dropped after {i} steps.")
        break
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00			`"""`
			`Example script on how to train, save, load, and test an RLlib agent.`
[RLlib] Better example scripts: Description --no-tune and --local-mode CLI options (#17038) 2021-07-27 05:25:48 +03:00			`Equivalent script with stable baselines: sb2rllib_sb_example.py.`
			`Demonstrates transition from stable_baselines to Ray RLlib.`

			`Run example: python sb2rllib_rllib_example.py`
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00			`"""`
			`import gym`
			`import ray`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`import ray.rllib.algorithms.ppo as ppo`
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00
			`# settings used for both stable baselines and rllib`
			`env_name = "CartPole-v1"`
			`train_steps = 10000`
			`learning_rate = 1e-3`
			`save_dir = "saved_models"`

			`# training and saving`
			`analysis = ray.tune.run(`
			`"PPO",`
			`stop={"timesteps_total": train_steps},`
			`config={"env": env_name, "lr": learning_rate},`
			`checkpoint_at_end=True,`
			`local_dir=save_dir,`
			`)`
			`# retrieve the checkpoint path`
			`analysis.default_metric = "episode_reward_mean"`
			`analysis.default_mode = "max"`
			`checkpoint_path = analysis.get_best_checkpoint(trial=analysis.get_best_trial())`
			`print(f"Trained model saved at {checkpoint_path}")`

			`# load and restore model`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`agent = ppo.PPO(env=env_name)`
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00			`agent.restore(checkpoint_path)`
			`print(f"Agent loaded from saved model at {checkpoint_path}")`

			`# inference`
			`env = gym.make(env_name)`
			`obs = env.reset()`
			`for i in range(1000):`
[RLlib] CQL BC loss fixes; PPO/PG/A2\|3C action normalization fixes (#16531) 2021-06-30 12:32:11 +02:00			`action = agent.compute_single_action(obs)`
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00			`obs, reward, done, info = env.step(action)`
			`env.render()`
			`if done:`
			`print(f"Cart pole dropped after {i} steps.")`
			`break`