ray/rllib/examples/documentation/rllib_in_60s.py
Balaji Veeramani 7f1bacc7dc
[CI] Format Python code with Black (#21975)
See #21316 and #21311 for the motivation behind these changes.
2022-01-29 18:41:57 -08:00

43 lines
1.4 KiB
Python

# __rllib-in-60s-begin__
# Import the RL algorithm (Trainer) we would like to use.
from ray.rllib.agents.ppo import PPOTrainer
# Configure the algorithm.
config = {
# Environment (RLlib understands openAI gym registered strings).
"env": "Taxi-v3",
# Use 2 environment workers (aka "rollout workers") that parallelly
# collect samples from their own environment clone(s).
"num_workers": 2,
# Change this to "framework: torch", if you are using PyTorch.
# Also, use "framework: tf2" for tf2.x eager execution.
"framework": "tf",
# Tweak the default model provided automatically by RLlib,
# given the environment's observation- and action spaces.
"model": {
"fcnet_hiddens": [64, 64],
"fcnet_activation": "relu",
},
# Set up a separate evaluation worker set for the
# `trainer.evaluate()` call after training (see below).
"evaluation_num_workers": 1,
# Only for evaluation runs, render the env.
"evaluation_config": {
"render_env": True,
},
}
# Create our RLlib Trainer.
trainer = PPOTrainer(config=config)
# Run it for n training iterations. A training iteration includes
# parallel sample collection by the environment workers as well as
# loss calculation on the collected batch and a model update.
for _ in range(3):
print(trainer.train())
# Evaluate the trained Trainer (and render each timestep to the shell's
# output).
trainer.evaluate()
# __rllib-in-60s-end__