ray/rllib/examples/sb2rllib_sb_example.py

"""
Example script on how to train, save, load, and test a stable baselines 2 agent
Code taken and adjusted from SB2 docs:
https://stable-baselines.readthedocs.io/en/master/guide/quickstart.html
Equivalent script with RLlib: sb2rllib_rllib_example.py
"""
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO2

# settings used for both stable baselines and rllib
env_name = "CartPole-v1"
train_steps = 10000
learning_rate = 1e-3
save_dir = "saved_models"

save_path = f"{save_dir}/sb_model_{train_steps}steps"
env = gym.make(env_name)

# training and saving
model = PPO2(MlpPolicy, env, learning_rate=learning_rate, verbose=1)
model.learn(total_timesteps=train_steps)
model.save(save_path)
print(f"Trained model saved at {save_path}")

# delete and load model (just for illustration)
del model
model = PPO2.load(save_path)
print(f"Agent loaded from saved model at {save_path}")

# inference
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        print(f"Cart pole dropped after {i} steps.")
        break
[RLlib] Examples for training, saving, loading, testing an agent with SB & RLlib (#15897) 2021-05-19 17:36:59 +03:00			`"""`
			`Example script on how to train, save, load, and test a stable baselines 2 agent`
			`Code taken and adjusted from SB2 docs:`
			`https://stable-baselines.readthedocs.io/en/master/guide/quickstart.html`
			`Equivalent script with RLlib: sb2rllib_rllib_example.py`
			`"""`
			`import gym`

			`from stable_baselines.common.policies import MlpPolicy`
			`from stable_baselines import PPO2`

			`# settings used for both stable baselines and rllib`
			`env_name = "CartPole-v1"`
			`train_steps = 10000`
			`learning_rate = 1e-3`
			`save_dir = "saved_models"`

			`save_path = f"{save_dir}/sb_model_{train_steps}steps"`
			`env = gym.make(env_name)`

			`# training and saving`
			`model = PPO2(MlpPolicy, env, learning_rate=learning_rate, verbose=1)`
			`model.learn(total_timesteps=train_steps)`
			`model.save(save_path)`
			`print(f"Trained model saved at {save_path}")`

			`# delete and load model (just for illustration)`
			`del model`
			`model = PPO2.load(save_path)`
			`print(f"Agent loaded from saved model at {save_path}")`

			`# inference`
			`obs = env.reset()`
			`for i in range(1000):`
			`action, _states = model.predict(obs)`
			`obs, reward, done, info = env.step(action)`
			`env.render()`
			`if done:`
			`print(f"Cart pole dropped after {i} steps.")`
			`break`