ray/rllib/utils/policy_server.py
Sven 60d4d5e1aa Remove future imports (#6724)
* Remove all __future__ imports from RLlib.

* Remove (object) again from tf_run_builder.py::TFRunBuilder.

* Fix 2xLINT warnings.

* Fix broken appo_policy import (must be appo_tf_policy)

* Remove future imports from all other ray files (not just RLlib).

* Remove future imports from all other ray files (not just RLlib).

* Remove future import blocks that contain `unicode_literals` as well.
Revert appo_tf_policy.py to appo_policy.py (belongs to another PR).

* Add two empty lines before Schedule class.

* Put back __future__ imports into determine_tests_to_run.py. Fails otherwise on a py2/print related error.
2020-01-09 00:15:48 -08:00

87 lines
3.3 KiB
Python

import pickle
import traceback
from http.server import SimpleHTTPRequestHandler, HTTPServer
from socketserver import ThreadingMixIn
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.policy_client import PolicyClient
@PublicAPI
class PolicyServer(ThreadingMixIn, HTTPServer):
"""REST server than can be launched from a ExternalEnv.
This launches a multi-threaded server that listens on the specified host
and port to serve policy requests and forward experiences to RLlib.
Examples:
>>> class CartpoleServing(ExternalEnv):
def __init__(self):
ExternalEnv.__init__(
self, spaces.Discrete(2),
spaces.Box(
low=-10,
high=10,
shape=(4,),
dtype=np.float32))
def run(self):
server = PolicyServer(self, "localhost", 8900)
server.serve_forever()
>>> register_env("srv", lambda _: CartpoleServing())
>>> pg = PGTrainer(env="srv", config={"num_workers": 0})
>>> while True:
pg.train()
>>> client = PolicyClient("localhost:8900")
>>> eps_id = client.start_episode()
>>> action = client.get_action(eps_id, obs)
>>> ...
>>> client.log_returns(eps_id, reward)
>>> ...
>>> client.log_returns(eps_id, reward)
"""
@PublicAPI
def __init__(self, external_env, address, port):
handler = _make_handler(external_env)
HTTPServer.__init__(self, (address, port), handler)
def _make_handler(external_env):
class Handler(SimpleHTTPRequestHandler):
def do_POST(self):
content_len = int(self.headers.get("Content-Length"), 0)
raw_body = self.rfile.read(content_len)
parsed_input = pickle.loads(raw_body)
try:
response = self.execute_command(parsed_input)
self.send_response(200)
self.end_headers()
self.wfile.write(pickle.dumps(response))
except Exception:
self.send_error(500, traceback.format_exc())
def execute_command(self, args):
command = args["command"]
response = {}
if command == PolicyClient.START_EPISODE:
response["episode_id"] = external_env.start_episode(
args["episode_id"], args["training_enabled"])
elif command == PolicyClient.GET_ACTION:
response["action"] = external_env.get_action(
args["episode_id"], args["observation"])
elif command == PolicyClient.LOG_ACTION:
external_env.log_action(args["episode_id"],
args["observation"], args["action"])
elif command == PolicyClient.LOG_RETURNS:
external_env.log_returns(args["episode_id"], args["reward"],
args["info"])
elif command == PolicyClient.END_EPISODE:
external_env.end_episode(args["episode_id"],
args["observation"])
else:
raise Exception("Unknown command: {}".format(command))
return response
return Handler