2020-12-31 11:12:17 +08:00
|
|
|
import aiohttp.web
|
2021-11-06 10:37:54 -07:00
|
|
|
import dataclasses
|
2021-10-23 10:48:16 -07:00
|
|
|
from functools import wraps
|
|
|
|
import logging
|
2021-11-06 10:37:54 -07:00
|
|
|
from typing import Any, Callable
|
2021-11-04 11:59:47 -07:00
|
|
|
import json
|
2021-11-06 10:37:54 -07:00
|
|
|
import traceback
|
|
|
|
from dataclasses import dataclass
|
2020-12-31 11:12:17 +08:00
|
|
|
|
2021-10-23 10:48:16 -07:00
|
|
|
import ray
|
2021-09-15 11:17:15 -05:00
|
|
|
import ray.dashboard.utils as dashboard_utils
|
2021-11-02 16:01:54 -05:00
|
|
|
from ray._private.runtime_env.packaging import (package_exists,
|
|
|
|
upload_package_to_gcs)
|
2021-11-10 14:14:55 -08:00
|
|
|
from ray.dashboard.modules.job.common import (
|
|
|
|
GetPackageResponse,
|
|
|
|
JobStatus,
|
|
|
|
JobSubmitRequest,
|
|
|
|
JobSubmitResponse,
|
|
|
|
JobStopResponse,
|
|
|
|
JobStatusResponse,
|
|
|
|
JobLogsResponse,
|
|
|
|
JOBS_API_ROUTE_LOGS,
|
|
|
|
JOBS_API_ROUTE_SUBMIT,
|
|
|
|
JOBS_API_ROUTE_STOP,
|
|
|
|
JOBS_API_ROUTE_STATUS,
|
|
|
|
JOBS_API_ROUTE_PACKAGE,
|
|
|
|
)
|
|
|
|
from ray.dashboard.modules.job.job_manager import JobManager
|
2021-11-02 16:01:54 -05:00
|
|
|
|
2020-12-31 11:12:17 +08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
routes = dashboard_utils.ClassMethodRouteTable
|
|
|
|
|
2021-10-23 10:48:16 -07:00
|
|
|
RAY_INTERNAL_JOBS_NAMESPACE = "_ray_internal_jobs_"
|
2020-12-31 11:12:17 +08:00
|
|
|
|
2021-10-23 10:48:16 -07:00
|
|
|
|
|
|
|
def _ensure_ray_initialized(f: Callable) -> Callable:
|
|
|
|
@wraps(f)
|
|
|
|
def check(self, *args, **kwargs):
|
|
|
|
if not ray.is_initialized():
|
|
|
|
ray.init(address="auto", namespace=RAY_INTERNAL_JOBS_NAMESPACE)
|
|
|
|
return f(self, *args, **kwargs)
|
|
|
|
|
|
|
|
return check
|
2020-12-31 11:12:17 +08:00
|
|
|
|
|
|
|
|
|
|
|
class JobHead(dashboard_utils.DashboardHeadModule):
|
|
|
|
def __init__(self, dashboard_head):
|
|
|
|
super().__init__(dashboard_head)
|
|
|
|
|
2021-10-23 10:48:16 -07:00
|
|
|
self._job_manager = None
|
|
|
|
|
2021-11-06 10:37:54 -07:00
|
|
|
async def _parse_and_validate_request(self, req: aiohttp.web.Request,
|
|
|
|
request_type: dataclass) -> Any:
|
|
|
|
"""Parse request and cast to request type. If parsing failed, return a
|
|
|
|
Response object with status 400 and stacktrace instead.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
# TODO: (jiaodong) Validate if job request is valid without using
|
|
|
|
# pydantic.
|
|
|
|
result = request_type(**(await req.json()))
|
|
|
|
except Exception:
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
reason=traceback.format_exc().encode("utf-8"),
|
|
|
|
status=aiohttp.web.HTTPBadRequest.status_code)
|
|
|
|
return result
|
|
|
|
|
2021-11-04 17:45:25 -05:00
|
|
|
@routes.get(JOBS_API_ROUTE_PACKAGE)
|
2021-11-02 16:01:54 -05:00
|
|
|
@_ensure_ray_initialized
|
|
|
|
async def get_package(self,
|
|
|
|
req: aiohttp.web.Request) -> aiohttp.web.Response:
|
2021-11-04 11:59:47 -07:00
|
|
|
package_uri = req.query["package_uri"]
|
2021-11-06 10:37:54 -07:00
|
|
|
try:
|
|
|
|
exists = package_exists(package_uri)
|
|
|
|
except Exception:
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
reason=traceback.format_exc().encode("utf-8"),
|
|
|
|
status=aiohttp.web.HTTPInternalServerError.status_code)
|
|
|
|
|
|
|
|
resp = GetPackageResponse(package_exists=exists)
|
2021-11-04 11:59:47 -07:00
|
|
|
return aiohttp.web.Response(
|
|
|
|
text=json.dumps(dataclasses.asdict(resp)),
|
|
|
|
content_type="application/json")
|
2021-11-02 16:01:54 -05:00
|
|
|
|
2021-11-04 17:45:25 -05:00
|
|
|
@routes.put(JOBS_API_ROUTE_PACKAGE)
|
2021-11-02 16:01:54 -05:00
|
|
|
@_ensure_ray_initialized
|
2021-11-04 11:59:47 -07:00
|
|
|
async def upload_package(self, req: aiohttp.web.Request):
|
|
|
|
package_uri = req.query["package_uri"]
|
2021-11-02 16:01:54 -05:00
|
|
|
logger.info(f"Uploading package {package_uri} to the GCS.")
|
2021-11-06 10:37:54 -07:00
|
|
|
try:
|
|
|
|
upload_package_to_gcs(package_uri, await req.read())
|
|
|
|
except Exception:
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
reason=traceback.format_exc().encode("utf-8"),
|
|
|
|
status=aiohttp.web.HTTPInternalServerError.status_code)
|
2021-11-04 11:59:47 -07:00
|
|
|
|
2021-11-06 10:37:54 -07:00
|
|
|
return aiohttp.web.Response(status=aiohttp.web.HTTPOk.status_code, )
|
2021-11-02 16:01:54 -05:00
|
|
|
|
2021-11-04 17:45:25 -05:00
|
|
|
@routes.post(JOBS_API_ROUTE_SUBMIT)
|
2021-10-23 10:48:16 -07:00
|
|
|
@_ensure_ray_initialized
|
2021-11-02 16:01:54 -05:00
|
|
|
async def submit(self, req: aiohttp.web.Request) -> aiohttp.web.Response:
|
2021-11-06 10:37:54 -07:00
|
|
|
result = await self._parse_and_validate_request(req, JobSubmitRequest)
|
|
|
|
# Request parsing failed, returned with Response object.
|
|
|
|
if isinstance(result, aiohttp.web.Response):
|
|
|
|
return result
|
|
|
|
else:
|
|
|
|
submit_request = result
|
|
|
|
|
|
|
|
try:
|
|
|
|
job_id = self._job_manager.submit_job(
|
|
|
|
entrypoint=submit_request.entrypoint,
|
2021-11-08 23:10:27 -08:00
|
|
|
job_id=submit_request.job_id,
|
2021-11-06 10:37:54 -07:00
|
|
|
runtime_env=submit_request.runtime_env,
|
|
|
|
metadata=submit_request.metadata)
|
|
|
|
|
|
|
|
resp = JobSubmitResponse(job_id=job_id)
|
|
|
|
except Exception:
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
reason=traceback.format_exc().encode("utf-8"),
|
|
|
|
status=aiohttp.web.HTTPInternalServerError.status_code)
|
|
|
|
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
text=json.dumps(dataclasses.asdict(resp)),
|
|
|
|
content_type="application/json",
|
|
|
|
status=aiohttp.web.HTTPOk.status_code,
|
|
|
|
)
|
|
|
|
|
|
|
|
@routes.post(JOBS_API_ROUTE_STOP)
|
|
|
|
@_ensure_ray_initialized
|
|
|
|
async def stop(self, req: aiohttp.web.Request) -> aiohttp.web.Response:
|
|
|
|
job_id = req.query["job_id"]
|
|
|
|
try:
|
|
|
|
stopped = self._job_manager.stop_job(job_id)
|
|
|
|
resp = JobStopResponse(stopped=stopped)
|
|
|
|
except Exception:
|
|
|
|
return aiohttp.web.Response(
|
|
|
|
reason=traceback.format_exc().encode("utf-8"),
|
|
|
|
status=aiohttp.web.HTTPInternalServerError.status_code)
|
|
|
|
|
2021-11-04 11:59:47 -07:00
|
|
|
return aiohttp.web.Response(
|
|
|
|
text=json.dumps(dataclasses.asdict(resp)),
|
|
|
|
content_type="application/json")
|
2021-10-23 10:48:16 -07:00
|
|
|
|
2021-11-04 17:45:25 -05:00
|
|
|
@routes.get(JOBS_API_ROUTE_STATUS)
|
2021-10-23 10:48:16 -07:00
|
|
|
@_ensure_ray_initialized
|
2021-11-02 16:01:54 -05:00
|
|
|
async def status(self, req: aiohttp.web.Request) -> aiohttp.web.Response:
|
2021-11-04 11:59:47 -07:00
|
|
|
job_id = req.query["job_id"]
|
|
|
|
status: JobStatus = self._job_manager.get_job_status(job_id)
|
2021-10-23 10:48:16 -07:00
|
|
|
resp = JobStatusResponse(job_status=status)
|
2021-11-04 11:59:47 -07:00
|
|
|
return aiohttp.web.Response(
|
|
|
|
text=json.dumps(dataclasses.asdict(resp)),
|
|
|
|
content_type="application/json")
|
2021-10-23 10:48:16 -07:00
|
|
|
|
2021-11-04 17:45:25 -05:00
|
|
|
@routes.get(JOBS_API_ROUTE_LOGS)
|
2021-10-23 10:48:16 -07:00
|
|
|
@_ensure_ray_initialized
|
2021-11-02 16:01:54 -05:00
|
|
|
async def logs(self, req: aiohttp.web.Request) -> aiohttp.web.Response:
|
2021-11-04 11:59:47 -07:00
|
|
|
job_id = req.query["job_id"]
|
2021-11-06 10:37:54 -07:00
|
|
|
|
2021-11-09 22:34:12 -08:00
|
|
|
logs: str = self._job_manager.get_job_logs(job_id)
|
2021-10-23 10:48:16 -07:00
|
|
|
# TODO(jiaodong): Support log streaming #19415
|
2021-11-09 22:34:12 -08:00
|
|
|
resp = JobLogsResponse(logs=logs)
|
2021-11-04 11:59:47 -07:00
|
|
|
return aiohttp.web.Response(
|
|
|
|
text=json.dumps(dataclasses.asdict(resp)),
|
|
|
|
content_type="application/json")
|
2020-12-31 11:12:17 +08:00
|
|
|
|
2021-10-23 10:48:16 -07:00
|
|
|
async def run(self, server):
|
|
|
|
if not self._job_manager:
|
|
|
|
self._job_manager = JobManager()
|