ray/rllib/utils/policy_server.py

97 lines
3.7 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pickle
import sys
import traceback
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.policy_client import PolicyClient
if sys.version_info[0] == 2:
from SimpleHTTPServer import SimpleHTTPRequestHandler
from SocketServer import TCPServer as HTTPServer
from SocketServer import ThreadingMixIn
elif sys.version_info[0] == 3:
from http.server import SimpleHTTPRequestHandler, HTTPServer
from socketserver import ThreadingMixIn
@PublicAPI
class PolicyServer(ThreadingMixIn, HTTPServer):
"""REST server than can be launched from a ExternalEnv.
This launches a multi-threaded server that listens on the specified host
and port to serve policy requests and forward experiences to RLlib.
Examples:
>>> class CartpoleServing(ExternalEnv):
def __init__(self):
ExternalEnv.__init__(
self, spaces.Discrete(2),
spaces.Box(
low=-10,
high=10,
shape=(4,),
dtype=np.float32))
def run(self):
server = PolicyServer(self, "localhost", 8900)
server.serve_forever()
>>> register_env("srv", lambda _: CartpoleServing())
>>> pg = PGTrainer(env="srv", config={"num_workers": 0})
>>> while True:
pg.train()
>>> client = PolicyClient("localhost:8900")
>>> eps_id = client.start_episode()
>>> action = client.get_action(eps_id, obs)
>>> ...
>>> client.log_returns(eps_id, reward)
>>> ...
>>> client.log_returns(eps_id, reward)
"""
@PublicAPI
def __init__(self, external_env, address, port):
handler = _make_handler(external_env)
HTTPServer.__init__(self, (address, port), handler)
def _make_handler(external_env):
class Handler(SimpleHTTPRequestHandler):
def do_POST(self):
content_len = int(self.headers.get("Content-Length"), 0)
raw_body = self.rfile.read(content_len)
parsed_input = pickle.loads(raw_body)
try:
response = self.execute_command(parsed_input)
self.send_response(200)
self.end_headers()
self.wfile.write(pickle.dumps(response))
except Exception:
self.send_error(500, traceback.format_exc())
def execute_command(self, args):
command = args["command"]
response = {}
if command == PolicyClient.START_EPISODE:
response["episode_id"] = external_env.start_episode(
args["episode_id"], args["training_enabled"])
elif command == PolicyClient.GET_ACTION:
response["action"] = external_env.get_action(
args["episode_id"], args["observation"])
elif command == PolicyClient.LOG_ACTION:
external_env.log_action(args["episode_id"],
args["observation"], args["action"])
elif command == PolicyClient.LOG_RETURNS:
external_env.log_returns(args["episode_id"], args["reward"],
args["info"])
elif command == PolicyClient.END_EPISODE:
external_env.end_episode(args["episode_id"],
args["observation"])
else:
raise Exception("Unknown command: {}".format(command))
return response
return Handler