Retry actor failures in serve failure test (#8282)

This commit is contained in:
Edward Oakes 2020-05-02 10:19:44 -05:00 committed by GitHub
parent 8d3236f1d0
commit 22cab930cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -6,6 +6,7 @@ import requests
import ray
from ray import serve
from ray.serve.utils import retry_actor_failures
from ray.cluster_utils import Cluster
num_redis_shards = 1
@ -25,7 +26,11 @@ for i in range(num_nodes):
redis_max_memory=redis_max_memory,
webui_host="0.0.0.0")
ray.init(address=cluster.address, include_webui=True, webui_host="0.0.0.0")
ray.init(
address=cluster.address,
include_webui=True,
webui_host="0.0.0.0",
log_to_driver=False)
serve.init(blocking=True)
@ -37,10 +42,11 @@ class RandomKiller:
def _get_all_serve_actors(self):
master = serve.api._get_master_actor()
[router] = ray.get(master.get_router.remote())
[http_proxy] = ray.get(master.get_http_proxy.remote())
[router] = retry_actor_failures(master.get_router)
[http_proxy] = retry_actor_failures(master.get_http_proxy)
all_handles = [master, router, http_proxy]
worker_handle_dict = ray.get(master.get_all_worker_handles.remote())
worker_handle_dict = retry_actor_failures(
master.get_all_worker_handles)
for _, replica_dict in worker_handle_dict.items():
all_handles.extend(list(replica_dict.values()))