2021-11-10 22:20:06 +01:00
|
|
|
# __rllib-in-60s-begin__
|
2022-06-11 15:10:39 +02:00
|
|
|
# Import the RL algorithm (Algorithm) we would like to use.
|
2022-06-04 07:35:24 +02:00
|
|
|
from ray.rllib.algorithms.ppo import PPO
|
2021-11-10 22:20:06 +01:00
|
|
|
|
|
|
|
# Configure the algorithm.
|
|
|
|
config = {
|
|
|
|
# Environment (RLlib understands openAI gym registered strings).
|
|
|
|
"env": "Taxi-v3",
|
|
|
|
# Use 2 environment workers (aka "rollout workers") that parallelly
|
|
|
|
# collect samples from their own environment clone(s).
|
|
|
|
"num_workers": 2,
|
|
|
|
# Change this to "framework: torch", if you are using PyTorch.
|
|
|
|
# Also, use "framework: tf2" for tf2.x eager execution.
|
|
|
|
"framework": "tf",
|
|
|
|
# Tweak the default model provided automatically by RLlib,
|
|
|
|
# given the environment's observation- and action spaces.
|
|
|
|
"model": {
|
|
|
|
"fcnet_hiddens": [64, 64],
|
|
|
|
"fcnet_activation": "relu",
|
|
|
|
},
|
|
|
|
# Set up a separate evaluation worker set for the
|
2022-06-11 15:10:39 +02:00
|
|
|
# `algo.evaluate()` call after training (see below).
|
2021-11-10 22:20:06 +01:00
|
|
|
"evaluation_num_workers": 1,
|
|
|
|
# Only for evaluation runs, render the env.
|
|
|
|
"evaluation_config": {
|
|
|
|
"render_env": True,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
# Create our RLlib Trainer.
|
2022-06-11 15:10:39 +02:00
|
|
|
algo = PPO(config=config)
|
2021-11-10 22:20:06 +01:00
|
|
|
|
|
|
|
# Run it for n training iterations. A training iteration includes
|
|
|
|
# parallel sample collection by the environment workers as well as
|
|
|
|
# loss calculation on the collected batch and a model update.
|
|
|
|
for _ in range(3):
|
2022-06-11 15:10:39 +02:00
|
|
|
print(algo.train())
|
2021-11-10 22:20:06 +01:00
|
|
|
|
|
|
|
# Evaluate the trained Trainer (and render each timestep to the shell's
|
|
|
|
# output).
|
2022-06-11 15:10:39 +02:00
|
|
|
algo.evaluate()
|
2021-11-10 22:20:06 +01:00
|
|
|
|
|
|
|
# __rllib-in-60s-end__
|