mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[serve] Don't halt main control loop due to exceptions in snapshot logic (#20151)
This commit is contained in:
parent
215f47bc53
commit
39b3eb9763
1 changed files with 11 additions and 2 deletions
|
@ -181,21 +181,30 @@ class ServeController:
|
|||
deployment_name, new_deployment_info)
|
||||
|
||||
async def run_control_loop(self) -> None:
|
||||
# NOTE(edoakes): we catch all exceptions here and simply log them,
|
||||
# because an unhandled exception would cause the main control loop to
|
||||
# halt, which should *never* happen.
|
||||
while True:
|
||||
try:
|
||||
self.autoscale()
|
||||
except Exception:
|
||||
logger.exception("Exception while autoscaling deployments.")
|
||||
logger.exception("Exception in autoscaling.")
|
||||
|
||||
async with self.write_lock:
|
||||
try:
|
||||
self.http_state.update()
|
||||
except Exception:
|
||||
logger.exception("Exception updating HTTP state.")
|
||||
|
||||
try:
|
||||
self.deployment_state_manager.update()
|
||||
except Exception:
|
||||
logger.exception("Exception updating deployment state.")
|
||||
self._put_serve_snapshot()
|
||||
|
||||
try:
|
||||
self._put_serve_snapshot()
|
||||
except Exception:
|
||||
logger.exception("Exception putting serve snapshot.")
|
||||
await asyncio.sleep(CONTROL_LOOP_PERIOD_S)
|
||||
|
||||
def _put_serve_snapshot(self) -> None:
|
||||
|
|
Loading…
Add table
Reference in a new issue