mirror of
https://github.com/vale981/ray
synced 2025-03-08 11:31:40 -05:00

The existing Job info in the cluster snapshot uses the old definition of Job, which is a single Ray driver (a single `ray.init()` connection). In the new Job Submission protocol, a Job just specifies an entrypoint which can be any shell command. As such a Job can have zero or multiple Ray drivers. This means we should add a new snapshot entry corresponding to new jobs. We'll leave the old snapshot in place for legacy jobs. - Also fixes `get_all_jobs` by using the appropriate KV namespace, and stripping the job key KV prefix from the job ID. It wasn't working before. - This PR also unifies the datatype used by the GET jobs/ endpoint to be the same as the one used by the new jobs cluster snapshot. For backwards compatibility, the `status` and `message` fields are preserved.
274 lines
7.9 KiB
JSON
274 lines
7.9 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
"$id": "http://github.com/ray-project/ray/dashboard/modules/snapshot/snapshot_schema.json",
|
|
"type": "object",
|
|
"properties": {
|
|
"result": {
|
|
"type": "boolean"
|
|
},
|
|
"msg": {
|
|
"type": "string"
|
|
},
|
|
"data": {
|
|
"type": "object",
|
|
"properties": {
|
|
"snapshot": {
|
|
"type": "object",
|
|
"properties": {
|
|
"ray_version": {
|
|
"type": "string"
|
|
},
|
|
"ray_commit": {
|
|
"type": "string"
|
|
},
|
|
"jobs": {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"[0-9a-f]*": {
|
|
"type": "object",
|
|
"properties": {
|
|
"status": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"status_message": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"isDead": {
|
|
"type": "boolean"
|
|
},
|
|
"startTime": {
|
|
"type": "integer"
|
|
},
|
|
"endTime": {
|
|
"type": "integer"
|
|
},
|
|
"config": {
|
|
"type": "object",
|
|
"properties": {
|
|
"namespace": {
|
|
"type": "string"
|
|
},
|
|
"metadata": {
|
|
"type": "object"
|
|
},
|
|
"runtimeEnv": {
|
|
"type": "object"
|
|
}
|
|
},
|
|
"required": [
|
|
"namespace",
|
|
"metadata",
|
|
"runtimeEnv"
|
|
]
|
|
}
|
|
},
|
|
"required": [
|
|
"isDead",
|
|
"startTime",
|
|
"endTime",
|
|
"config"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"jobSubmission": {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"[0-9a-f]*": {
|
|
"type": "object",
|
|
"properties": {
|
|
"status": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"message": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"errorType": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"startTime": {
|
|
"type": ["integer", "null"]
|
|
},
|
|
"endTime": {
|
|
"type": ["integer", "null"]
|
|
},
|
|
"metadata": {
|
|
"type": "object"
|
|
},
|
|
"runtimeEnv": {
|
|
"type": "object"
|
|
}
|
|
},
|
|
"required": [
|
|
"status"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"actors": {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"[0-9a-f]*": {
|
|
"jobId": {
|
|
"type": "string"
|
|
},
|
|
"state": {
|
|
"type": "string"
|
|
},
|
|
"name": {
|
|
"type": "string"
|
|
},
|
|
"namespace": {
|
|
"type": "string"
|
|
},
|
|
"runtimeEnv": {
|
|
"type": "string"
|
|
},
|
|
"startTime": {
|
|
"type": "integer"
|
|
},
|
|
"endTime": {
|
|
"type": "integer"
|
|
},
|
|
"isDetached": {
|
|
"type": "boolean"
|
|
},
|
|
"resources": {
|
|
"type": "object",
|
|
"properties": {
|
|
".*": {
|
|
"type": "number"
|
|
}
|
|
}
|
|
},
|
|
"actorClass": {
|
|
"type": "string"
|
|
},
|
|
"currentWorkerId": {
|
|
"type": "string"
|
|
},
|
|
"currentRayletId": {
|
|
"type": "string"
|
|
},
|
|
"ipAddress": {
|
|
"type": "string"
|
|
},
|
|
"port": {
|
|
"type": "integer"
|
|
},
|
|
"metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"serve": {
|
|
"type": "object",
|
|
"properties": {
|
|
"deploymentName": {
|
|
"type": "string"
|
|
},
|
|
"replicaTag": {
|
|
"type": "string"
|
|
},
|
|
"version": {
|
|
"type": ["string", "null"]
|
|
}
|
|
},
|
|
"required": [
|
|
"deploymentName",
|
|
"replicaTag",
|
|
"version"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"required": [
|
|
"jobId",
|
|
"state",
|
|
"name",
|
|
"namespace",
|
|
"runtimeEnv",
|
|
"startTime",
|
|
"endTime",
|
|
"isDetached",
|
|
"resources",
|
|
"currentWorkerId",
|
|
"currentRayletId",
|
|
"actorClass",
|
|
"ipAddress",
|
|
"port"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"deployments": {
|
|
"type": "object",
|
|
"patternProperties": {
|
|
"[0-9a-f]*": {
|
|
"type": "object",
|
|
"properties": {
|
|
"className": {
|
|
"type": "string"
|
|
},
|
|
"endTime": {
|
|
"type": "integer"
|
|
},
|
|
"startTime": {
|
|
"type": "integer"
|
|
},
|
|
"httpRoute": {
|
|
"type": ["string", "null"]
|
|
},
|
|
"name": {
|
|
"type": "string"
|
|
},
|
|
"namespace": {
|
|
"type": "string"
|
|
},
|
|
"rayJobId": {
|
|
"type": "string"
|
|
},
|
|
"status": {
|
|
"type": "string"
|
|
},
|
|
"version": {
|
|
"type": ["string", "null"]
|
|
}
|
|
},
|
|
"required": [
|
|
"className",
|
|
"startTime",
|
|
"endTime",
|
|
"httpRoute",
|
|
"name",
|
|
"namespace",
|
|
"rayJobId",
|
|
"status",
|
|
"version"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"sessionName": {
|
|
"type": "string"
|
|
},
|
|
"required": [
|
|
"rayVersion",
|
|
"rayCommit",
|
|
"jobs",
|
|
"jobSubmission",
|
|
"actors",
|
|
"sessionName"
|
|
]
|
|
}
|
|
},
|
|
"required": [
|
|
"snapshot"
|
|
]
|
|
}
|
|
},
|
|
"required": [
|
|
"result",
|
|
"msg",
|
|
"data"
|
|
]
|
|
}
|